Source code for langchain_google_community.bq_storage_vectorstores.utils
from typing import Any, Dict
from google.cloud.exceptions import NotFound
[docs]
def validate_column_in_bq_schema(
columns: dict, column_name: str, expected_types: list, expected_modes: list
) -> None:
"""Validates a column within a BigQuery schema.
Args:
columns: A dictionary of BigQuery SchemaField objects representing
the table schema.
column_name: The name of the column to validate.
expected_types: A list of acceptable data types for the column.
expected_modes: A list of acceptable modes for the column.
Raises:
ValueError: If the column doesn't exist, has an unacceptable type,
or has an unacceptable mode.
"""
if column_name not in columns:
raise ValueError(f"Column {column_name} is missing from the schema.")
column = columns[column_name]
if column.field_type not in expected_types:
raise ValueError(
f"Column {column_name} must be one of the following types: {expected_types}"
)
if column.mode not in expected_modes:
raise ValueError(
f"Column {column_name} must be one of the following modes: {expected_modes}"
)
[docs]
def doc_match_filter(document: Dict[str, Any], filter: Dict[str, Any]) -> bool:
for column, value in filter.items():
# ignore fields that are not part of the document
if document.get(column, value) != value:
return False
return True
[docs]
def cast_proto_type(column: str, value: Any) -> Any:
if column.startswith("int"):
return int(value)
elif column.startswith("double"):
return float(value)
elif column.startswith("bool"):
return bool(value)
return value
[docs]
def check_bq_dataset_exists(client: Any, dataset_id: str) -> bool:
from google.cloud import bigquery # type: ignore[attr-defined]
if not isinstance(client, bigquery.Client):
raise TypeError("client must be an instance of bigquery.Client")
try:
client.get_dataset(dataset_id) # Make an API request.
return True
except NotFound:
return False