Source code for langchain_google_community.sheets.get_spreadsheet_info
"""Get metadata information from Google Sheets."""
import json
from typing import List, Optional, Type
from langchain_core.callbacks import CallbackManagerForToolRun
from pydantic import BaseModel, Field
from .base import SheetsBaseTool
from .utils import validate_spreadsheet_id
[docs]
class GetSpreadsheetInfoSchema(BaseModel):
"""Input schema for GetSpreadsheetInfo."""
spreadsheet_id: str = Field(
...,
description="The ID of the Google Spreadsheet to get information about. "
"Can be extracted from URL or provided directly.",
)
include_grid_data: bool = Field(
default=False,
description=(
"Whether to include detailed grid data (cell properties, formatting). "
"Note: This can significantly increase response size."
),
)
include_formatting: bool = Field(
default=False,
description="Whether to include cell formatting information when "
"include_grid_data is True.",
)
include_validation: bool = Field(
default=False,
description="Whether to include data validation rules when "
"include_grid_data is True.",
)
ranges: Optional[List[str]] = Field(
default=None,
description="Specific ranges to get information about. "
"If None, gets info for all sheets.",
)
fields: Optional[str] = Field(
default=None,
description="Specific fields to return (e.g., "
"'sheets.properties.title,sheets.properties.sheetId'). "
"If None, returns all available fields.",
)
[docs]
class SheetsGetSpreadsheetInfoTool(SheetsBaseTool):
"""Tool that retrieves comprehensive metadata information from Google Sheets.
This tool provides detailed metadata extraction capabilities from Google Sheets,
allowing you to understand spreadsheet structure, sheet properties, named ranges,
and other organizational information. It's essential for exploring spreadsheet
contents before reading data and understanding the overall structure.
Instantiate:
.. code-block:: python
from langchain_google_community.sheets import SheetsGetSpreadsheetInfoTool
tool = SheetsGetSpreadsheetInfoTool(
api_key="your_api_key",
include_grid_data=False,
include_formatting=False,
include_validation=False,
)
Invoke directly:
.. code-block:: python
result = tool.run(
{
"spreadsheet_id": "1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgvE2upms",
"include_grid_data": False,
"fields": "properties,sheets.properties",
}
)
Invoke with agent:
.. code-block:: python
agent.invoke({"input": "Get information about the spreadsheet structure"})
Returns:
JSON string containing:
- Spreadsheet properties: Title, locale, timezone, etc.
- Sheet information: Names, IDs, dimensions, properties
- Named ranges: Defined ranges and their locations
- Grid data: Detailed cell information (optional)
- Metadata: Processing information and data structure
Information Types Available:
- Basic info: Title, locale, timezone, creation date
- Sheet details: Names, IDs, row/column counts, properties
- Named ranges: Defined ranges and their sheet references
- Grid data: Cell properties, formatting, validation rules
- Developer metadata: Custom properties and annotations
Example Response:
{
"spreadsheet_id": "1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgvE2upms",
"properties": {
"title": "Student Data",
"locale": "en_US",
"timeZone": "America/New_York",
"createdTime": "2024-01-15T10:30:00Z"
},
"sheets": [
{
"properties": {
"sheetId": 0,
"title": "Students",
"rowCount": 100,
"columnCount": 10,
"gridProperties": {
"rowCount": 100,
"columnCount": 10
}
}
}
],
"named_ranges": [
{
"name": "StudentList",
"range": "Students!A1:E100"
}
],
"metadata": {
"total_sheets": 1,
"total_named_ranges": 1,
"processing_time": "0.2s"
}
}
Raises:
ValueError: If spreadsheet_id is invalid
Exception: For API errors or connection issues
"""
name: str = "sheets_get_spreadsheet_info"
description: str = (
"Retrieve comprehensive metadata information from Google Sheets including "
"spreadsheet properties, sheet details, named ranges, and organizational "
"structure. Essential for understanding spreadsheet contents before reading "
"data and exploring spreadsheet structure."
)
args_schema: Type[GetSpreadsheetInfoSchema] = GetSpreadsheetInfoSchema
def _run(
self,
spreadsheet_id: str,
include_grid_data: bool = False,
include_formatting: bool = False,
include_validation: bool = False,
ranges: Optional[List[str]] = None,
fields: Optional[str] = None,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> str:
"""Run the tool to get spreadsheet information."""
try:
# Validate spreadsheet ID
validated_spreadsheet_id = validate_spreadsheet_id(spreadsheet_id)
# Get the service (handles both OAuth2 and API key auth)
service = self._get_service()
# Build the request parameters
request_params = {
"spreadsheetId": validated_spreadsheet_id,
"includeGridData": include_grid_data,
}
if ranges:
request_params["ranges"] = ranges
if fields:
request_params["fields"] = fields
# Get spreadsheet information
response = service.spreadsheets().get(**request_params).execute()
# Process the response
processed_info = self._process_spreadsheet_info(
response, include_formatting, include_validation
)
return json.dumps(processed_info, indent=2, default=str)
except Exception as error:
raise Exception(f"Error getting spreadsheet info: {error}") from error
def _process_spreadsheet_info(
self,
response: dict,
include_formatting: bool = False,
include_validation: bool = False,
) -> dict:
"""Process the raw spreadsheet info response."""
# Extract basic spreadsheet info
spreadsheet_info = {
"spreadsheet_id": response.get("spreadsheetId"),
"title": response.get("properties", {}).get("title"),
"locale": response.get("properties", {}).get("locale"),
"time_zone": response.get("properties", {}).get("timeZone"),
"auto_recalc": response.get("properties", {}).get("autoRecalc"),
"default_format": response.get("properties", {}).get("defaultFormat", {}),
"sheets": [],
"named_ranges": [],
"developer_metadata": [],
}
# Process sheets information
sheets = response.get("sheets", [])
for sheet in sheets:
sheet_info = {
"sheet_id": sheet.get("properties", {}).get("sheetId"),
"title": sheet.get("properties", {}).get("title"),
"sheet_type": sheet.get("properties", {}).get("sheetType"),
"grid_properties": sheet.get("properties", {}).get(
"gridProperties", {}
),
"tab_color": sheet.get("properties", {}).get("tabColor", {}),
"hidden": sheet.get("properties", {}).get("hidden", False),
"right_to_left": sheet.get("properties", {}).get("rightToLeft", False),
}
# Add grid data if available
if "data" in sheet:
sheet_info["grid_data"] = self._process_grid_data(sheet["data"])
# Add formatting info if requested
if include_formatting and "data" in sheet:
sheet_info["formatting"] = self._extract_formatting(sheet["data"])
# Add validation info if requested
if include_validation and "data" in sheet:
sheet_info["validation"] = self._extract_validation(sheet["data"])
spreadsheet_info["sheets"].append(sheet_info)
# Process named ranges
named_ranges = response.get("namedRanges", [])
for named_range in named_ranges:
range_info = {
"name": named_range.get("name"),
"range": named_range.get("range"),
"named_range_id": named_range.get("namedRangeId"),
}
spreadsheet_info["named_ranges"].append(range_info)
# Process developer metadata
developer_metadata = response.get("developerMetadata", [])
for metadata in developer_metadata:
metadata_info = {
"metadata_id": metadata.get("metadataId"),
"key": metadata.get("key"),
"value": metadata.get("value"),
"visibility": metadata.get("visibility"),
"location": metadata.get("location", {}),
}
spreadsheet_info["developer_metadata"].append(metadata_info)
return spreadsheet_info
def _process_grid_data(self, grid_data: List[dict]) -> List[List[str]]:
"""Process grid data using simplified patterns from the guide."""
if not grid_data:
return []
# Get the first GridData (usually contains all data)
grid = grid_data[0]
# Extract simple data using the safe extraction pattern
result = []
for row_data in grid.get("rowData", []):
row_values = []
for cell_data in row_data.get("values", []):
# Use the safe extraction pattern
value = self._safe_get_cell_value(cell_data)
row_values.append(value)
result.append(row_values)
return result
def _extract_formatting(self, grid_data: List[dict]) -> List[List[dict]]:
"""Extract cell formatting information."""
if not grid_data:
return []
grid = grid_data[0]
formatting_info = []
for row_data in grid.get("rowData", []):
row_formatting = []
for cell_data in row_data.get("values", []):
cell_formatting = {
"user_entered_format": cell_data.get("userEnteredFormat", {}),
"effective_format": cell_data.get("effectiveFormat", {}),
}
row_formatting.append(cell_formatting)
formatting_info.append(row_formatting)
return formatting_info
def _extract_validation(self, grid_data: List[dict]) -> List[List[dict]]:
"""Extract data validation rules."""
if not grid_data:
return []
grid = grid_data[0]
validation_info = []
for row_data in grid.get("rowData", []):
row_validation = []
for cell_data in row_data.get("values", []):
validation_rule = cell_data.get("dataValidation", {})
if validation_rule:
row_validation.append(validation_rule)
else:
row_validation.append({})
validation_info.append(row_validation)
return validation_info