[docs]classGCSDirectoryLoader(BaseLoader):"""Load from GCS directory."""
[docs]def__init__(self,project_name:str,bucket:str,prefix:str="",loader_func:Optional[Callable[[str],BaseLoader]]=None,):"""Initialize with bucket and key name. Args: project_name: The ID of the project for the GCS bucket. bucket: The name of the GCS bucket. prefix: The prefix of the GCS bucket. loader_func: A loader function that instantiates a loader based on a file_path argument. If nothing is provided, the GCSFileLoader would use its default loader. """self.project_name=project_nameself.bucket=bucketself.prefix=prefixself._loader_func=loader_func
[docs]defload(self)->List[Document]:"""Load documents."""try:fromgoogle.cloudimportstorage# type: ignore[attr-defined]exceptImportError:raiseImportError("Could not import google-cloud-storage python package. ""Please, install gcs dependency group: ""`pip install langchain-google-community[gcs]`")client=storage.Client(project=self.project_name,client_info=get_client_info(module="google-cloud-storage"),)docs=[]forblobinclient.list_blobs(self.bucket,prefix=self.prefix):# we shall just skip directories since GCSFileLoader creates# intermediate directories on the flyifblob.name.endswith("/"):continueloader=GCSFileLoader(self.project_name,self.bucket,blob.name,loader_func=self._loader_func)docs.extend(loader.load())returndocs