Source code for langchain_google_community.sheets.read_sheet_tools

"""Read tools for Google Sheets.

This module contains all read-related tools for Google Sheets, including
base schemas, base tool class, and specific read implementations.
"""

import json
from typing import List, Optional, Type, Union

from langchain_core.callbacks import CallbackManagerForToolRun
from pydantic import BaseModel, Field

from .base import SheetsBaseTool
from .enums import (
    DateTimeRenderOption,
    FilterConditionType,
    MajorDimension,
    ValueRenderOption,
)
from .utils import (
    validate_range_name,
    validate_spreadsheet_id,
)

# ============================================================================
# 1. BASE SCHEMAS
# ============================================================================



[docs]
class ReadBaseSchema(BaseModel):
    """Base schema for all read operations.

    Contains common fields that are shared across all read tools.
    """

    spreadsheet_id: str = Field(
        description="The ID of the Google Spreadsheet to read from."
    )
    value_render_option: ValueRenderOption = Field(
        default=ValueRenderOption.FORMATTED_VALUE,
        description="How values should be rendered in the output.",
    )
    date_time_render_option: DateTimeRenderOption = Field(
        default=DateTimeRenderOption.SERIAL_NUMBER,
        description="How dates, times, and durations should be rendered.",
    )
    convert_to_records: bool = Field(
        default=False,
        description="Whether to convert data to records (list of dictionaries) "
        "using first row as headers.",
    )
    numericise_values: bool = Field(
        default=True,
        description="Whether to automatically convert string numbers to numeric types.",
    )



# ============================================================================
# 2. BASE READ TOOL
# ============================================================================



[docs]
class BaseReadTool(SheetsBaseTool):
    """Base class for Google Sheets read operations.

    Provides shared functionality for data processing, value extraction,
    and record conversion that is common across all read tools.
    """

    def _numericise(self, value: str) -> Union[str, int, float]:
        """Convert string values to numbers when possible.

        Args:
            value: String value to convert

        Returns:
            Union[str, int, float]: Converted value or original string
        """
        if not isinstance(value, str):
            return value

        if value == "":
            return ""

        # Remove commas and try to convert
        cleaned_value = value.replace(",", "")

        try:
            return int(cleaned_value)
        except ValueError:
            try:
                return float(cleaned_value)
            except ValueError:
                return value

    def _to_records(self, headers: List[str], values: List[List]) -> List[dict]:
        """Convert 2D array to list of dictionaries.

        Args:
            headers: List of column headers
            values: 2D array of data values

        Returns:
            List[dict]: List of dictionaries with headers as keys
        """
        return [dict(zip(headers, row)) for row in values]

    def _process_data(
        self,
        values: List[List[str]],
        convert_to_records: bool,
        numericise_values: bool,
    ) -> Union[List[List], List[dict]]:
        """Process raw data from Google Sheets.

        Args:
            values: 2D array of raw values from Google Sheets
            convert_to_records: Whether to convert to records format
            numericise_values: Whether to convert string numbers to numeric types

        Returns:
            Union[List[List], List[dict]]: Processed data as 2D array or records
        """
        if not values:
            return []

        # Convert to records if requested
        if convert_to_records and len(values) > 1:
            headers = values[0]
            data_rows = values[1:]

            if numericise_values:
                processed_rows = []
                for row in data_rows:
                    processed_row = [self._numericise(cell) for cell in row]
                    processed_rows.append(processed_row)
                return self._to_records(headers, processed_rows)
            else:
                return self._to_records(headers, data_rows)

        # Process as 2D array
        if numericise_values:
            processed_values = []
            for row in values:
                processed_row = [self._numericise(cell) for cell in row]
                processed_values.append(processed_row)
            return processed_values

        return values

    def _extract_simple_data(self, grid_data: List[dict]) -> List[List[str]]:
        """Extract simple 2D array from complex GridData structure.

        Args:
            grid_data: GridData from Google Sheets API

        Returns:
            List[List[str]]: Simple 2D array of values
        """
        if not grid_data:
            return []

        # Get the first GridData (usually contains all data)
        grid = grid_data[0]

        # Extract simple data using safe extraction
        result = []
        for row_data in grid.get("rowData", []):
            row_values = []
            for cell_data in row_data.get("values", []):
                value = self._safe_get_cell_value(cell_data)
                row_values.append(value)
            result.append(row_values)

        return result



# ============================================================================
# 3. READ SHEET DATA (Schema + Tool)
# ============================================================================



[docs]
class ReadSheetDataSchema(ReadBaseSchema):
    """Input schema for ReadSheetData."""

    range_name: str = Field(
        description="A1 notation range to read from the spreadsheet."
    )




[docs]
class SheetsReadDataTool(BaseReadTool):
    """Tool that reads data from a single range in Google Sheets.

    This tool provides comprehensive data reading capabilities from Google Sheets,
    supporting various rendering options, data transformation, and flexible output
    formats. It's designed for extracting specific data from single sheet ranges
    with full control over how the data is processed and returned.

    Instantiate:
        .. code-block:: python

            from langchain_google_community.sheets import SheetsReadDataTool

            tool = SheetsReadDataTool(
                api_key="your_api_key",
                value_render_option=ValueRenderOption.FORMATTED_VALUE,
                convert_to_records=True,
                numericise_values=True,
            )

    Invoke directly:
        .. code-block:: python

            result = tool.run(
                {
                    "spreadsheet_id": "1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgvE2upms",
                    "range_name": "A1:E10",
                    "convert_to_records": True,
                }
            )

    Invoke with agent:
        .. code-block:: python

            agent.invoke(
                {"input": "Read the first 10 rows from the student data spreadsheet"}
            )

    Returns:
        JSON string containing:
            - Raw data: 2D array of values as returned by Google Sheets API
            - Records format: List of dictionaries with first row as headers
            - Metadata: Information about the data structure and processing
            - Error handling: Detailed error messages for troubleshooting

    Data Processing Options:
        - value_render_option: Control how cell values are rendered
            * FORMATTED_VALUE: Human-readable format (default)
            * UNFORMATTED_VALUE: Raw values without formatting
            * FORMULA: Cell formulas as text
        - date_time_render_option: Control how dates/times are rendered
            * SERIAL_NUMBER: Excel-style serial numbers (default)
            * FORMATTED_STRING: Human-readable date/time strings
        - convert_to_records: Transform 2D array to list of dictionaries
        - numericise_values: Automatically convert numeric strings to numbers

    Example Response:
        {
            "spreadsheet_id": "1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgvE2upms",
            "range_name": "A1:E3",
            "values": [
                ["Name", "Age", "Grade", "Subject", "Score"],
                ["Alice", "16", "10th", "Math", "95"],
                ["Bob", "17", "11th", "Science", "87"]
            ],
            "records": [
                {
                    "Name": "Alice", "Age": 16, "Grade": "10th",
                    "Subject": "Math", "Score": 95
                },
                {
                    "Name": "Bob", "Age": 17, "Grade": "11th",
                    "Subject": "Science", "Score": 87
                }
            ],
            "metadata": {
                "total_rows": 3,
                "total_columns": 5,
                "has_headers": true,
                "data_types": ["string", "number", "string", "string", "number"]
            }
        }

    Raises:
        ValueError: If spreadsheet_id or range_name is invalid
        Exception: For API errors or connection issues
    """

    name: str = "sheets_read_data"
    description: str = (
        "Read data from a single range in Google Sheets with comprehensive "
        "rendering options and data transformation capabilities. Supports "
        "formatted/unformatted values, record conversion, and numeric processing. "
        "Perfect for extracting specific data from single sheet ranges."
    )
    args_schema: Type[BaseModel] = ReadSheetDataSchema

    def _run(
        self,
        spreadsheet_id: str,
        range_name: str,
        value_render_option: ValueRenderOption = ValueRenderOption.FORMATTED_VALUE,
        date_time_render_option: DateTimeRenderOption = (
            DateTimeRenderOption.SERIAL_NUMBER
        ),
        convert_to_records: bool = False,
        numericise_values: bool = True,
        run_manager: Optional[CallbackManagerForToolRun] = None,
    ) -> str:
        """Read data from a Google Spreadsheet."""
        try:
            # Validate inputs
            validate_spreadsheet_id(spreadsheet_id)
            validate_range_name(range_name)

            # Get the service
            service = self._get_service()

            # Make the request
            request = (
                service.spreadsheets()
                .values()
                .get(
                    spreadsheetId=spreadsheet_id,
                    range=range_name,
                    valueRenderOption=value_render_option.value,
                    dateTimeRenderOption=date_time_render_option.value,
                )
            )

            response = request.execute()
            values = response.get("values", [])

            # Process the data
            processed_data = self._process_data(
                values, convert_to_records, numericise_values
            )

            result = {
                "range": response.get("range", range_name),
                "values": processed_data,
                "major_dimension": response.get("majorDimension", "ROWS"),
                "render_options": {
                    "value_render_option": value_render_option.value,
                    "date_time_render_option": date_time_render_option.value,
                },
                "processing_options": {
                    "convert_to_records": convert_to_records,
                    "numericise_values": numericise_values,
                },
            }

            return json.dumps(result, indent=2, default=str)

        except Exception as error:
            raise Exception(f"Error reading sheet data: {error}") from error



# ============================================================================
# 4. BATCH READ SHEET DATA (Schema + Tool)
# ============================================================================



[docs]
class BatchReadSheetDataSchema(ReadBaseSchema):
    """Input schema for BatchReadSheetData."""

    ranges: List[str] = Field(
        description="List of A1 notation ranges to read from the spreadsheet."
    )
    major_dimension: MajorDimension = Field(
        default=MajorDimension.ROWS,
        description="The major dimension that results should use.",
    )




[docs]
class SheetsBatchReadDataTool(BaseReadTool):
    """Tool that reads data from multiple ranges in Google Sheets efficiently.

    This tool provides efficient batch reading capabilities from Google Sheets,
    allowing you to read multiple ranges in a single API call. It's optimized
    for scenarios where you need to extract data from multiple sheets, ranges,
    or sections of a spreadsheet simultaneously, reducing API calls and improving
    performance.

    Instantiate:
        .. code-block:: python

            from langchain_google_community.sheets import SheetsBatchReadDataTool

            tool = SheetsBatchReadDataTool(
                api_key="your_api_key",
                value_render_option=ValueRenderOption.FORMATTED_VALUE,
                convert_to_records=True,
                numericise_values=True,
            )

    Invoke directly:
        .. code-block:: python

            result = tool.run(
                {
                    "spreadsheet_id": "1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgvE2upms",
                    "ranges": ["A1:C5", "F1:H5", "Sheet2!A1:D10"],
                    "convert_to_records": True,
                }
            )

    Invoke with agent:
        .. code-block:: python

            agent.invoke({"input": "Read data from multiple ranges in the spreadsheet"})

    Returns:
        JSON string containing:
            - Batch results: Dictionary with range names as keys
            - Individual range data: Each range processed according to options
            - Metadata: Information about each range and processing results
            - Error handling: Detailed error messages for failed ranges

    Performance Benefits:
        - Single API call: Reduces network overhead and rate limiting
        - Parallel processing: All ranges processed simultaneously
        - Efficient batching: Optimized for multiple data extraction scenarios
        - Consistent formatting: All ranges processed with same options

    Example Response:
        {
            "spreadsheet_id": "1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgvE2upms",
            "ranges": ["A1:C3", "F1:H3"],
            "results": {
                "A1:C3": {
                    "values": [
                        ["Name", "Age", "Grade"],
                        ["Alice", "16", "10th"],
                        ["Bob", "17", "11th"]
                    ],
                    "records": [
                        {"Name": "Alice", "Age": 16, "Grade": "10th"},
                        {"Name": "Bob", "Age": 17, "Grade": "11th"}
                    ]
                },
                "F1:H3": {
                    "values": [
                        ["Subject", "Score", "Date"],
                        ["Math", "95", "2024-01-15"],
                        ["Science", "87", "2024-01-16"]
                    ],
                    "records": [
                        {"Subject": "Math", "Score": 95, "Date": "2024-01-15"},
                        {"Subject": "Science", "Score": 87, "Date": "2024-01-16"}
                    ]
                }
            },
            "metadata": {
                "total_ranges": 2,
                "successful_ranges": 2,
                "failed_ranges": 0,
                "processing_time": "0.5s"
            }
        }

    Raises:
        ValueError: If spreadsheet_id is invalid or ranges list is empty
        Exception: For API errors or connection issues
    """

    name: str = "sheets_batch_read_data"
    description: str = (
        "Read data from multiple ranges in Google Sheets efficiently using "
        "batch API calls. Supports multiple A1 notation ranges, various rendering "
        "options, and data transformation. Optimized for extracting data from "
        "multiple sheets or ranges simultaneously."
    )
    args_schema: Type[BaseModel] = BatchReadSheetDataSchema

    def _run(
        self,
        spreadsheet_id: str,
        ranges: List[str],
        value_render_option: ValueRenderOption = ValueRenderOption.FORMATTED_VALUE,
        date_time_render_option: DateTimeRenderOption = (
            DateTimeRenderOption.SERIAL_NUMBER
        ),
        major_dimension: MajorDimension = MajorDimension.ROWS,
        convert_to_records: bool = False,
        numericise_values: bool = True,
        run_manager: Optional[CallbackManagerForToolRun] = None,
    ) -> str:
        """Read data from multiple ranges in a Google Spreadsheet."""
        try:
            # Validate inputs
            validate_spreadsheet_id(spreadsheet_id)
            if not ranges:
                raise ValueError("At least one range must be specified")

            for range_name in ranges:
                validate_range_name(range_name)

            # Get the service
            service = self._get_service()

            # Make the batch request
            request = (
                service.spreadsheets()
                .values()
                .batchGet(
                    spreadsheetId=spreadsheet_id,
                    ranges=ranges,
                    valueRenderOption=value_render_option.value,
                    dateTimeRenderOption=date_time_render_option.value,
                    majorDimension=major_dimension.value,
                )
            )

            response = request.execute()

            # Process the response with enhanced metadata
            results = []
            successful_ranges = 0
            failed_ranges = 0
            value_ranges = response.get("valueRanges", [])

            for i, value_range in enumerate(value_ranges):
                range_name = value_range.get("range", f"range_{i}")
                values = value_range.get("values", [])
                error = None

                try:
                    processed_data = self._process_data(
                        values, convert_to_records, numericise_values
                    )
                    successful_ranges += 1
                except Exception as e:
                    processed_data = None
                    error = str(e)
                    failed_ranges += 1

                results.append(
                    {
                        "range": range_name,
                        "data": processed_data,
                        "error": error,
                    }
                )

            batch_metadata = {
                "spreadsheet_id": spreadsheet_id,
                "requested_ranges": ranges,
                "value_render_option": value_render_option.value,
                "date_time_render_option": date_time_render_option.value,
                "major_dimension": major_dimension.value,
                "convert_to_records": convert_to_records,
                "numericise_values": numericise_values,
                "total_ranges": len(ranges),
                "successful_ranges": successful_ranges,
                "failed_ranges": failed_ranges,
                "results": results,
            }

            return json.dumps(batch_metadata, indent=2, default=str)

        except Exception as error:
            raise Exception(f"Error batch reading sheet data: {error}") from error



# ============================================================================
# 5. FILTERED READ SHEET DATA (Schema + Tool)
# ============================================================================



[docs]
class DataFilterSchema(BaseModel):
    """Schema for data filter criteria."""

    column_index: int = Field(
        ...,
        description="The column index (0-based) to filter on.",
    )
    condition: FilterConditionType = Field(
        ...,
        description="The condition type to apply for filtering.",
    )
    value: str = Field(
        ...,
        description="The value to compare against.",
    )




[docs]
class FilteredReadSheetDataSchema(ReadBaseSchema):
    """Input schema for FilteredReadSheetData."""

    data_filters: List[DataFilterSchema] = Field(
        ...,
        description="List of data filters to apply to the spreadsheet.",
    )
    include_grid_data: bool = Field(
        default=False,
        description=(
            "Whether to include detailed grid data (cell properties, formatting). "
            "Note: This can significantly increase response size."
        ),
    )




[docs]
class SheetsFilteredReadDataTool(BaseReadTool):
    """Tool that reads data from Google Sheets with advanced filtering capabilities.

    This tool provides advanced data filtering capabilities using Google Sheets'
    getByDataFilter API method. It allows you to apply complex filtering conditions
    to extract specific data based on criteria, supporting various data types and
    comparison operators. Requires OAuth2 authentication for full functionality.

    Instantiate:
        .. code-block:: python

            from langchain_google_community.sheets import SheetsFilteredReadDataTool

            tool = SheetsFilteredReadDataTool(
                credentials_path="path/to/credentials.json",
                include_grid_data=True,
                convert_to_records=True,
                numericise_values=True,
            )

    Invoke directly:
        .. code-block:: python

            result = tool.run(
                {
                    "spreadsheet_id": "1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgvE2upms",
                    "data_filters": [
                        {
                            "column_index": 1,
                            "condition": "NUMBER_GREATER",
                            "value": "50",
                        }
                    ],
                    "include_grid_data": True,
                }
            )

    Invoke with agent:
        .. code-block:: python

            agent.invoke({"input": "Find all students with scores above 80"})

    Returns:
        JSON string containing:
            - Filtered data: Only rows matching the filter criteria
            - Grid data: Detailed cell information with formatting (optional)
            - Metadata: Information about filtering results and data structure
            - Error handling: Detailed error messages for troubleshooting

    Filter Conditions Available:
        - Number conditions: GREATER, LESS, EQUAL, BETWEEN, etc.
        - Text conditions: CONTAINS, STARTS_WITH, ENDS_WITH, EQUAL, etc.
        - Date conditions: IS_AFTER, IS_BEFORE, IS_ON_OR_AFTER, etc.
        - Boolean conditions: IS_TRUE, IS_FALSE

    Authentication Requirements:
        - Requires OAuth2 credentials (not API key)
        - Full access to spreadsheet data
        - Supports private and shared spreadsheets

    Example Response:
        {
            "spreadsheet_id": "1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgvE2upms",
            "data_filters": [
                {
                    "column_index": 1,
                    "condition": "NUMBER_GREATER",
                    "value": "50"
                }
            ],
            "filtered_data": [
                ["Alice", "95", "Math", "A"],
                ["Charlie", "87", "Science", "B"]
            ],
            "records": [
                {"Name": "Alice", "Score": 95, "Subject": "Math", "Grade": "A"},
                {"Name": "Charlie", "Score": 87, "Subject": "Science", "Grade": "B"}
            ],
            "metadata": {
                "total_matching_rows": 2,
                "total_rows_scanned": 10,
                "filter_applied": "Score > 50",
                "processing_time": "0.3s"
            }
        }

    Raises:
        ValueError: If spreadsheet_id is invalid or data_filters is empty
        Exception: For API errors, authentication issues, or connection problems
    """

    name: str = "sheets_filtered_read_data"
    description: str = (
        "Read data from Google Sheets with advanced filtering capabilities using "
        "getByDataFilter API. Supports complex filtering conditions, grid data "
        "extraction, and various data types. Requires OAuth2 authentication for "
        "full functionality. Perfect for extracting specific data based on criteria."
    )
    args_schema: Type[BaseModel] = FilteredReadSheetDataSchema

    def _convert_to_dict_list(self, items: list) -> list:
        """Convert a list of items to dictionaries.

        Handles both Pydantic models and dicts.

        Args:
            items: List of items that may be Pydantic models or dictionaries

        Returns:
            list: List of dictionaries
        """
        result = []
        for item in items:
            if hasattr(item, "model_dump"):
                # It's a Pydantic model, convert to dict
                result.append(item.model_dump())
            else:
                # It's already a dict
                result.append(item)
        return result

    def _run(
        self,
        spreadsheet_id: str,
        data_filters: List[dict],
        include_grid_data: bool = False,
        value_render_option: ValueRenderOption = ValueRenderOption.FORMATTED_VALUE,
        date_time_render_option: DateTimeRenderOption = (
            DateTimeRenderOption.SERIAL_NUMBER
        ),
        convert_to_records: bool = False,
        numericise_values: bool = True,
        run_manager: Optional[CallbackManagerForToolRun] = None,
    ) -> str:
        """Read data from Google Sheets with filtering."""
        try:
            # Validate inputs
            validate_spreadsheet_id(spreadsheet_id)
            if not data_filters:
                raise ValueError("At least one data filter must be specified")

            # Get the service
            service = self._get_service()

            # Convert DataFilterSchema objects to dictionaries
            data_filters_dict = self._convert_to_dict_list(data_filters)

            # Prepare the request body
            request_body = {
                "dataFilters": data_filters_dict,
                "includeGridData": include_grid_data,
            }

            # Make the request
            request = service.spreadsheets().getByDataFilter(
                spreadsheetId=spreadsheet_id, body=request_body
            )

            response = request.execute()

            # Process the response
            result = self._process_filtered_response(
                response, convert_to_records, numericise_values
            )

            return json.dumps(result, indent=2, default=str)

        except Exception as error:
            raise Exception(f"Error filtered reading sheet data: {error}") from error

    def _process_filtered_response(
        self, response: dict, convert_to_records: bool, numericise_values: bool
    ) -> dict:
        """Process the filtered response from Google Sheets API."""
        result = {
            "spreadsheet_id": response.get("spreadsheetId"),
            "properties": response.get("properties", {}),
            "sheets": [],
        }

        for sheet in response.get("sheets", []):
            sheet_info = {
                "properties": sheet.get("properties", {}),
                "data": [],
            }

            # Process grid data if present
            for grid_data in sheet.get("data", []):
                if grid_data.get("rowData"):
                    # Extract simple data
                    simple_data = self._extract_simple_data([grid_data])

                    if convert_to_records and len(simple_data) > 1:
                        headers = simple_data[0]
                        data_rows = simple_data[1:]

                        if numericise_values:
                            processed_rows = []
                            for row in data_rows:
                                processed_row = [self._numericise(cell) for cell in row]
                                processed_rows.append(processed_row)
                            sheet_info["data"].append(
                                self._to_records(headers, processed_rows)
                            )
                        else:
                            sheet_info["data"].append(
                                self._to_records(headers, data_rows)
                            )
                    else:
                        if numericise_values:
                            processed_data = []
                            for row in simple_data:
                                processed_row = [self._numericise(cell) for cell in row]
                                processed_data.append(processed_row)
                            sheet_info["data"].append(processed_data)
                        else:
                            sheet_info["data"].append(simple_data)
                else:
                    sheet_info["data"].append([])

            result["sheets"].append(sheet_info)

        return result