[docs]classDuckDBLoader(BaseLoader):"""Load from `DuckDB`. Each document represents one row of the result. The `page_content_columns` are written into the `page_content` of the document. The `metadata_columns` are written into the `metadata` of the document. By default, all columns are written into the `page_content` and none into the `metadata`. """
[docs]def__init__(self,query:str,database:str=":memory:",read_only:bool=False,config:Optional[Dict[str,str]]=None,page_content_columns:Optional[List[str]]=None,metadata_columns:Optional[List[str]]=None,):""" Args: query: The query to execute. database: The database to connect to. Defaults to ":memory:". read_only: Whether to open the database in read-only mode. Defaults to False. config: A dictionary of configuration options to pass to the database. Optional. page_content_columns: The columns to write into the `page_content` of the document. Optional. metadata_columns: The columns to write into the `metadata` of the document. Optional. """self.query=queryself.database=databaseself.read_only=read_onlyself.config=configor{}self.page_content_columns=page_content_columnsself.metadata_columns=metadata_columns
[docs]defload(self)->List[Document]:try:importduckdbexceptImportError:raiseImportError("Could not import duckdb python package. ""Please install it with `pip install duckdb`.")docs=[]withduckdb.connect(database=self.database,read_only=self.read_only,config=self.config)ascon:query_result=con.execute(self.query)results=query_result.fetchall()description=cast(list,query_result.description)field_names=[c[0]forcindescription]ifself.page_content_columnsisNone:page_content_columns=field_nameselse:page_content_columns=self.page_content_columnsifself.metadata_columnsisNone:metadata_columns=[]else:metadata_columns=self.metadata_columnsforresultinresults:page_content="\n".join(f"{column}: {result[field_names.index(column)]}"forcolumninpage_content_columns)metadata={column:result[field_names.index(column)]forcolumninmetadata_columns}doc=Document(page_content=page_content,metadata=metadata)docs.append(doc)returndocs