Source code for langchain_google_community.sheets.read_sheet_tools

"""Read tools for Google Sheets.

This module contains all read-related tools for Google Sheets, including
base schemas, base tool class, and specific read implementations.
"""

import json
from typing import List, Optional, Type, Union

from langchain_core.callbacks import CallbackManagerForToolRun
from pydantic import BaseModel, Field

from .base import SheetsBaseTool
from .enums import (
    DateTimeRenderOption,
    FilterConditionType,
    MajorDimension,
    ValueRenderOption,
)
from .utils import (
    validate_range_name,
    validate_spreadsheet_id,
)

# ============================================================================
# 1. BASE SCHEMAS
# ============================================================================


[docs] class ReadBaseSchema(BaseModel): """Base schema for all read operations. Contains common fields that are shared across all read tools. """ spreadsheet_id: str = Field( description="The ID of the Google Spreadsheet to read from." ) value_render_option: ValueRenderOption = Field( default=ValueRenderOption.FORMATTED_VALUE, description="How values should be rendered in the output.", ) date_time_render_option: DateTimeRenderOption = Field( default=DateTimeRenderOption.SERIAL_NUMBER, description="How dates, times, and durations should be rendered.", ) convert_to_records: bool = Field( default=False, description="Whether to convert data to records (list of dictionaries) " "using first row as headers.", ) numericise_values: bool = Field( default=True, description="Whether to automatically convert string numbers to numeric types.", )
# ============================================================================ # 2. BASE READ TOOL # ============================================================================
[docs] class BaseReadTool(SheetsBaseTool): """Base class for Google Sheets read operations. Provides shared functionality for data processing, value extraction, and record conversion that is common across all read tools. """ def _numericise(self, value: str) -> Union[str, int, float]: """Convert string values to numbers when possible. Args: value: String value to convert Returns: Union[str, int, float]: Converted value or original string """ if not isinstance(value, str): return value if value == "": return "" # Remove commas and try to convert cleaned_value = value.replace(",", "") try: return int(cleaned_value) except ValueError: try: return float(cleaned_value) except ValueError: return value def _to_records(self, headers: List[str], values: List[List]) -> List[dict]: """Convert 2D array to list of dictionaries. Args: headers: List of column headers values: 2D array of data values Returns: List[dict]: List of dictionaries with headers as keys """ return [dict(zip(headers, row)) for row in values] def _process_data( self, values: List[List[str]], convert_to_records: bool, numericise_values: bool, ) -> Union[List[List], List[dict]]: """Process raw data from Google Sheets. Args: values: 2D array of raw values from Google Sheets convert_to_records: Whether to convert to records format numericise_values: Whether to convert string numbers to numeric types Returns: Union[List[List], List[dict]]: Processed data as 2D array or records """ if not values: return [] # Convert to records if requested if convert_to_records and len(values) > 1: headers = values[0] data_rows = values[1:] if numericise_values: processed_rows = [] for row in data_rows: processed_row = [self._numericise(cell) for cell in row] processed_rows.append(processed_row) return self._to_records(headers, processed_rows) else: return self._to_records(headers, data_rows) # Process as 2D array if numericise_values: processed_values = [] for row in values: processed_row = [self._numericise(cell) for cell in row] processed_values.append(processed_row) return processed_values return values def _extract_simple_data(self, grid_data: List[dict]) -> List[List[str]]: """Extract simple 2D array from complex GridData structure. Args: grid_data: GridData from Google Sheets API Returns: List[List[str]]: Simple 2D array of values """ if not grid_data: return [] # Get the first GridData (usually contains all data) grid = grid_data[0] # Extract simple data using safe extraction result = [] for row_data in grid.get("rowData", []): row_values = [] for cell_data in row_data.get("values", []): value = self._safe_get_cell_value(cell_data) row_values.append(value) result.append(row_values) return result
# ============================================================================ # 3. READ SHEET DATA (Schema + Tool) # ============================================================================
[docs] class ReadSheetDataSchema(ReadBaseSchema): """Input schema for ReadSheetData.""" range_name: str = Field( description="A1 notation range to read from the spreadsheet." )
[docs] class SheetsReadDataTool(BaseReadTool): """Tool that reads data from a single range in Google Sheets. This tool provides comprehensive data reading capabilities from Google Sheets, supporting various rendering options, data transformation, and flexible output formats. It's designed for extracting specific data from single sheet ranges with full control over how the data is processed and returned. Instantiate: .. code-block:: python from langchain_google_community.sheets import SheetsReadDataTool tool = SheetsReadDataTool( api_key="your_api_key", value_render_option=ValueRenderOption.FORMATTED_VALUE, convert_to_records=True, numericise_values=True, ) Invoke directly: .. code-block:: python result = tool.run( { "spreadsheet_id": "1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgvE2upms", "range_name": "A1:E10", "convert_to_records": True, } ) Invoke with agent: .. code-block:: python agent.invoke( {"input": "Read the first 10 rows from the student data spreadsheet"} ) Returns: JSON string containing: - Raw data: 2D array of values as returned by Google Sheets API - Records format: List of dictionaries with first row as headers - Metadata: Information about the data structure and processing - Error handling: Detailed error messages for troubleshooting Data Processing Options: - value_render_option: Control how cell values are rendered * FORMATTED_VALUE: Human-readable format (default) * UNFORMATTED_VALUE: Raw values without formatting * FORMULA: Cell formulas as text - date_time_render_option: Control how dates/times are rendered * SERIAL_NUMBER: Excel-style serial numbers (default) * FORMATTED_STRING: Human-readable date/time strings - convert_to_records: Transform 2D array to list of dictionaries - numericise_values: Automatically convert numeric strings to numbers Example Response: { "spreadsheet_id": "1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgvE2upms", "range_name": "A1:E3", "values": [ ["Name", "Age", "Grade", "Subject", "Score"], ["Alice", "16", "10th", "Math", "95"], ["Bob", "17", "11th", "Science", "87"] ], "records": [ { "Name": "Alice", "Age": 16, "Grade": "10th", "Subject": "Math", "Score": 95 }, { "Name": "Bob", "Age": 17, "Grade": "11th", "Subject": "Science", "Score": 87 } ], "metadata": { "total_rows": 3, "total_columns": 5, "has_headers": true, "data_types": ["string", "number", "string", "string", "number"] } } Raises: ValueError: If spreadsheet_id or range_name is invalid Exception: For API errors or connection issues """ name: str = "sheets_read_data" description: str = ( "Read data from a single range in Google Sheets with comprehensive " "rendering options and data transformation capabilities. Supports " "formatted/unformatted values, record conversion, and numeric processing. " "Perfect for extracting specific data from single sheet ranges." ) args_schema: Type[BaseModel] = ReadSheetDataSchema def _run( self, spreadsheet_id: str, range_name: str, value_render_option: ValueRenderOption = ValueRenderOption.FORMATTED_VALUE, date_time_render_option: DateTimeRenderOption = ( DateTimeRenderOption.SERIAL_NUMBER ), convert_to_records: bool = False, numericise_values: bool = True, run_manager: Optional[CallbackManagerForToolRun] = None, ) -> str: """Read data from a Google Spreadsheet.""" try: # Validate inputs validate_spreadsheet_id(spreadsheet_id) validate_range_name(range_name) # Get the service service = self._get_service() # Make the request request = ( service.spreadsheets() .values() .get( spreadsheetId=spreadsheet_id, range=range_name, valueRenderOption=value_render_option.value, dateTimeRenderOption=date_time_render_option.value, ) ) response = request.execute() values = response.get("values", []) # Process the data processed_data = self._process_data( values, convert_to_records, numericise_values ) result = { "range": response.get("range", range_name), "values": processed_data, "major_dimension": response.get("majorDimension", "ROWS"), "render_options": { "value_render_option": value_render_option.value, "date_time_render_option": date_time_render_option.value, }, "processing_options": { "convert_to_records": convert_to_records, "numericise_values": numericise_values, }, } return json.dumps(result, indent=2, default=str) except Exception as error: raise Exception(f"Error reading sheet data: {error}") from error
# ============================================================================ # 4. BATCH READ SHEET DATA (Schema + Tool) # ============================================================================
[docs] class BatchReadSheetDataSchema(ReadBaseSchema): """Input schema for BatchReadSheetData.""" ranges: List[str] = Field( description="List of A1 notation ranges to read from the spreadsheet." ) major_dimension: MajorDimension = Field( default=MajorDimension.ROWS, description="The major dimension that results should use.", )
[docs] class SheetsBatchReadDataTool(BaseReadTool): """Tool that reads data from multiple ranges in Google Sheets efficiently. This tool provides efficient batch reading capabilities from Google Sheets, allowing you to read multiple ranges in a single API call. It's optimized for scenarios where you need to extract data from multiple sheets, ranges, or sections of a spreadsheet simultaneously, reducing API calls and improving performance. Instantiate: .. code-block:: python from langchain_google_community.sheets import SheetsBatchReadDataTool tool = SheetsBatchReadDataTool( api_key="your_api_key", value_render_option=ValueRenderOption.FORMATTED_VALUE, convert_to_records=True, numericise_values=True, ) Invoke directly: .. code-block:: python result = tool.run( { "spreadsheet_id": "1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgvE2upms", "ranges": ["A1:C5", "F1:H5", "Sheet2!A1:D10"], "convert_to_records": True, } ) Invoke with agent: .. code-block:: python agent.invoke({"input": "Read data from multiple ranges in the spreadsheet"}) Returns: JSON string containing: - Batch results: Dictionary with range names as keys - Individual range data: Each range processed according to options - Metadata: Information about each range and processing results - Error handling: Detailed error messages for failed ranges Performance Benefits: - Single API call: Reduces network overhead and rate limiting - Parallel processing: All ranges processed simultaneously - Efficient batching: Optimized for multiple data extraction scenarios - Consistent formatting: All ranges processed with same options Example Response: { "spreadsheet_id": "1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgvE2upms", "ranges": ["A1:C3", "F1:H3"], "results": { "A1:C3": { "values": [ ["Name", "Age", "Grade"], ["Alice", "16", "10th"], ["Bob", "17", "11th"] ], "records": [ {"Name": "Alice", "Age": 16, "Grade": "10th"}, {"Name": "Bob", "Age": 17, "Grade": "11th"} ] }, "F1:H3": { "values": [ ["Subject", "Score", "Date"], ["Math", "95", "2024-01-15"], ["Science", "87", "2024-01-16"] ], "records": [ {"Subject": "Math", "Score": 95, "Date": "2024-01-15"}, {"Subject": "Science", "Score": 87, "Date": "2024-01-16"} ] } }, "metadata": { "total_ranges": 2, "successful_ranges": 2, "failed_ranges": 0, "processing_time": "0.5s" } } Raises: ValueError: If spreadsheet_id is invalid or ranges list is empty Exception: For API errors or connection issues """ name: str = "sheets_batch_read_data" description: str = ( "Read data from multiple ranges in Google Sheets efficiently using " "batch API calls. Supports multiple A1 notation ranges, various rendering " "options, and data transformation. Optimized for extracting data from " "multiple sheets or ranges simultaneously." ) args_schema: Type[BaseModel] = BatchReadSheetDataSchema def _run( self, spreadsheet_id: str, ranges: List[str], value_render_option: ValueRenderOption = ValueRenderOption.FORMATTED_VALUE, date_time_render_option: DateTimeRenderOption = ( DateTimeRenderOption.SERIAL_NUMBER ), major_dimension: MajorDimension = MajorDimension.ROWS, convert_to_records: bool = False, numericise_values: bool = True, run_manager: Optional[CallbackManagerForToolRun] = None, ) -> str: """Read data from multiple ranges in a Google Spreadsheet.""" try: # Validate inputs validate_spreadsheet_id(spreadsheet_id) if not ranges: raise ValueError("At least one range must be specified") for range_name in ranges: validate_range_name(range_name) # Get the service service = self._get_service() # Make the batch request request = ( service.spreadsheets() .values() .batchGet( spreadsheetId=spreadsheet_id, ranges=ranges, valueRenderOption=value_render_option.value, dateTimeRenderOption=date_time_render_option.value, majorDimension=major_dimension.value, ) ) response = request.execute() # Process the response with enhanced metadata results = [] successful_ranges = 0 failed_ranges = 0 value_ranges = response.get("valueRanges", []) for i, value_range in enumerate(value_ranges): range_name = value_range.get("range", f"range_{i}") values = value_range.get("values", []) error = None try: processed_data = self._process_data( values, convert_to_records, numericise_values ) successful_ranges += 1 except Exception as e: processed_data = None error = str(e) failed_ranges += 1 results.append( { "range": range_name, "data": processed_data, "error": error, } ) batch_metadata = { "spreadsheet_id": spreadsheet_id, "requested_ranges": ranges, "value_render_option": value_render_option.value, "date_time_render_option": date_time_render_option.value, "major_dimension": major_dimension.value, "convert_to_records": convert_to_records, "numericise_values": numericise_values, "total_ranges": len(ranges), "successful_ranges": successful_ranges, "failed_ranges": failed_ranges, "results": results, } return json.dumps(batch_metadata, indent=2, default=str) except Exception as error: raise Exception(f"Error batch reading sheet data: {error}") from error
# ============================================================================ # 5. FILTERED READ SHEET DATA (Schema + Tool) # ============================================================================
[docs] class DataFilterSchema(BaseModel): """Schema for data filter criteria.""" column_index: int = Field( ..., description="The column index (0-based) to filter on.", ) condition: FilterConditionType = Field( ..., description="The condition type to apply for filtering.", ) value: str = Field( ..., description="The value to compare against.", )
[docs] class FilteredReadSheetDataSchema(ReadBaseSchema): """Input schema for FilteredReadSheetData.""" data_filters: List[DataFilterSchema] = Field( ..., description="List of data filters to apply to the spreadsheet.", ) include_grid_data: bool = Field( default=False, description=( "Whether to include detailed grid data (cell properties, formatting). " "Note: This can significantly increase response size." ), )
[docs] class SheetsFilteredReadDataTool(BaseReadTool): """Tool that reads data from Google Sheets with advanced filtering capabilities. This tool provides advanced data filtering capabilities using Google Sheets' getByDataFilter API method. It allows you to apply complex filtering conditions to extract specific data based on criteria, supporting various data types and comparison operators. Requires OAuth2 authentication for full functionality. Instantiate: .. code-block:: python from langchain_google_community.sheets import SheetsFilteredReadDataTool tool = SheetsFilteredReadDataTool( credentials_path="path/to/credentials.json", include_grid_data=True, convert_to_records=True, numericise_values=True, ) Invoke directly: .. code-block:: python result = tool.run( { "spreadsheet_id": "1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgvE2upms", "data_filters": [ { "column_index": 1, "condition": "NUMBER_GREATER", "value": "50", } ], "include_grid_data": True, } ) Invoke with agent: .. code-block:: python agent.invoke({"input": "Find all students with scores above 80"}) Returns: JSON string containing: - Filtered data: Only rows matching the filter criteria - Grid data: Detailed cell information with formatting (optional) - Metadata: Information about filtering results and data structure - Error handling: Detailed error messages for troubleshooting Filter Conditions Available: - Number conditions: GREATER, LESS, EQUAL, BETWEEN, etc. - Text conditions: CONTAINS, STARTS_WITH, ENDS_WITH, EQUAL, etc. - Date conditions: IS_AFTER, IS_BEFORE, IS_ON_OR_AFTER, etc. - Boolean conditions: IS_TRUE, IS_FALSE Authentication Requirements: - Requires OAuth2 credentials (not API key) - Full access to spreadsheet data - Supports private and shared spreadsheets Example Response: { "spreadsheet_id": "1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgvE2upms", "data_filters": [ { "column_index": 1, "condition": "NUMBER_GREATER", "value": "50" } ], "filtered_data": [ ["Alice", "95", "Math", "A"], ["Charlie", "87", "Science", "B"] ], "records": [ {"Name": "Alice", "Score": 95, "Subject": "Math", "Grade": "A"}, {"Name": "Charlie", "Score": 87, "Subject": "Science", "Grade": "B"} ], "metadata": { "total_matching_rows": 2, "total_rows_scanned": 10, "filter_applied": "Score > 50", "processing_time": "0.3s" } } Raises: ValueError: If spreadsheet_id is invalid or data_filters is empty Exception: For API errors, authentication issues, or connection problems """ name: str = "sheets_filtered_read_data" description: str = ( "Read data from Google Sheets with advanced filtering capabilities using " "getByDataFilter API. Supports complex filtering conditions, grid data " "extraction, and various data types. Requires OAuth2 authentication for " "full functionality. Perfect for extracting specific data based on criteria." ) args_schema: Type[BaseModel] = FilteredReadSheetDataSchema def _convert_to_dict_list(self, items: list) -> list: """Convert a list of items to dictionaries. Handles both Pydantic models and dicts. Args: items: List of items that may be Pydantic models or dictionaries Returns: list: List of dictionaries """ result = [] for item in items: if hasattr(item, "model_dump"): # It's a Pydantic model, convert to dict result.append(item.model_dump()) else: # It's already a dict result.append(item) return result def _run( self, spreadsheet_id: str, data_filters: List[dict], include_grid_data: bool = False, value_render_option: ValueRenderOption = ValueRenderOption.FORMATTED_VALUE, date_time_render_option: DateTimeRenderOption = ( DateTimeRenderOption.SERIAL_NUMBER ), convert_to_records: bool = False, numericise_values: bool = True, run_manager: Optional[CallbackManagerForToolRun] = None, ) -> str: """Read data from Google Sheets with filtering.""" try: # Validate inputs validate_spreadsheet_id(spreadsheet_id) if not data_filters: raise ValueError("At least one data filter must be specified") # Get the service service = self._get_service() # Convert DataFilterSchema objects to dictionaries data_filters_dict = self._convert_to_dict_list(data_filters) # Prepare the request body request_body = { "dataFilters": data_filters_dict, "includeGridData": include_grid_data, } # Make the request request = service.spreadsheets().getByDataFilter( spreadsheetId=spreadsheet_id, body=request_body ) response = request.execute() # Process the response result = self._process_filtered_response( response, convert_to_records, numericise_values ) return json.dumps(result, indent=2, default=str) except Exception as error: raise Exception(f"Error filtered reading sheet data: {error}") from error def _process_filtered_response( self, response: dict, convert_to_records: bool, numericise_values: bool ) -> dict: """Process the filtered response from Google Sheets API.""" result = { "spreadsheet_id": response.get("spreadsheetId"), "properties": response.get("properties", {}), "sheets": [], } for sheet in response.get("sheets", []): sheet_info = { "properties": sheet.get("properties", {}), "data": [], } # Process grid data if present for grid_data in sheet.get("data", []): if grid_data.get("rowData"): # Extract simple data simple_data = self._extract_simple_data([grid_data]) if convert_to_records and len(simple_data) > 1: headers = simple_data[0] data_rows = simple_data[1:] if numericise_values: processed_rows = [] for row in data_rows: processed_row = [self._numericise(cell) for cell in row] processed_rows.append(processed_row) sheet_info["data"].append( self._to_records(headers, processed_rows) ) else: sheet_info["data"].append( self._to_records(headers, data_rows) ) else: if numericise_values: processed_data = [] for row in simple_data: processed_row = [self._numericise(cell) for cell in row] processed_data.append(processed_row) sheet_info["data"].append(processed_data) else: sheet_info["data"].append(simple_data) else: sheet_info["data"].append([]) result["sheets"].append(sheet_info) return result