[docs]classSQLDatabaseLoader(BaseLoader):""" Load documents by querying database tables supported by SQLAlchemy. For talking to the database, the document loader uses the `SQLDatabase` utility from the LangChain integration toolkit. Each document represents one row of the result. """
[docs]def__init__(self,query:Union[str,Select],db:SQLDatabase,*,parameters:Optional[Dict[str,Any]]=None,page_content_mapper:Optional[Callable[...,str]]=None,metadata_mapper:Optional[Callable[...,Dict[str,Any]]]=None,source_columns:Optional[Sequence[str]]=None,include_rownum_into_metadata:bool=False,include_query_into_metadata:bool=False,):""" Args: query: The query to execute. db: A LangChain `SQLDatabase`, wrapping an SQLAlchemy engine. sqlalchemy_kwargs: More keyword arguments for SQLAlchemy's `create_engine`. parameters: Optional. Parameters to pass to the query. page_content_mapper: Optional. Function to convert a row into a string to use as the `page_content` of the document. By default, the loader serializes the whole row into a string, including all columns. metadata_mapper: Optional. Function to convert a row into a dictionary to use as the `metadata` of the document. By default, no columns are selected into the metadata dictionary. source_columns: Optional. The names of the columns to use as the `source` within the metadata dictionary. include_rownum_into_metadata: Optional. Whether to include the row number into the metadata dictionary. Default: False. include_query_into_metadata: Optional. Whether to include the query expression into the metadata dictionary. Default: False. """self.query=queryself.db:SQLDatabase=dbself.parameters=parametersor{}self.page_content_mapper=(page_content_mapperorself.page_content_default_mapper)self.metadata_mapper=metadata_mapperorself.metadata_default_mapperself.source_columns=source_columnsself.include_rownum_into_metadata=include_rownum_into_metadataself.include_query_into_metadata=include_query_into_metadata
[docs]deflazy_load(self)->Iterator[Document]:try:importsqlalchemyassaexceptImportError:raiseImportError("Could not import sqlalchemy python package. ""Please install it with `pip install sqlalchemy`.")# Querying in `cursor` fetch mode will return an SQLAlchemy `Result` instance.result:sa.Result[Any]# Invoke the database query.ifisinstance(self.query,sa.SelectBase):result=self.db._execute(# type: ignore[assignment]self.query,fetch="cursor",parameters=self.parameters)query_sql=str(self.query.compile(bind=self.db._engine))elifisinstance(self.query,str):result=self.db._execute(# type: ignore[assignment]sa.text(self.query),fetch="cursor",parameters=self.parameters)query_sql=self.queryelse:raiseTypeError(f"Unable to process query of unknown type: {self.query}")# Iterate database result rows and generate list of documents.fori,rowinenumerate(result.mappings()):page_content=self.page_content_mapper(row)metadata=self.metadata_mapper(row)ifself.include_rownum_into_metadata:metadata["row"]=iifself.include_query_into_metadata:metadata["query"]=query_sqlsource_values=[]forcolumn,valueinrow.items():ifself.source_columnsandcolumninself.source_columns:source_values.append(value)ifsource_values:metadata["source"]=",".join(source_values)yieldDocument(page_content=page_content,metadata=metadata)
[docs]@staticmethoddefpage_content_default_mapper(row:RowMapping,column_names:Optional[List[str]]=None)->str:""" A reasonable default function to convert a record into a "page content" string. """ifcolumn_namesisNone:column_names=list(row.keys())return"\n".join(f"{column}: {value}"forcolumn,valueinrow.items()ifcolumnincolumn_names)
[docs]@staticmethoddefmetadata_default_mapper(row:RowMapping,column_names:Optional[List[str]]=None)->Dict[str,Any]:""" A reasonable default function to convert a record into a "metadata" dictionary. """ifcolumn_namesisNone:return{}metadata:Dict[str,Any]={}forcolumn,valueinrow.items():ifcolumnincolumn_names:metadata[column]=valuereturnmetadata