[docs]@deprecated(since="0.0.32",removal="1.0",alternative_import="langchain_google_community.GCSDirectoryLoader",)classGCSDirectoryLoader(BaseLoader):"""Load from GCS directory."""
[docs]def__init__(self,project_name:str,bucket:str,prefix:str="",loader_func:Optional[Callable[[str],BaseLoader]]=None,continue_on_failure:bool=False,):"""Initialize with bucket and key name. Args: project_name: The ID of the project for the GCS bucket. bucket: The name of the GCS bucket. prefix: The prefix of the GCS bucket. loader_func: A loader function that instantiates a loader based on a file_path argument. If nothing is provided, the GCSFileLoader would use its default loader. continue_on_failure: To use try-except block for each file within the GCS directory. If set to `True`, then failure to process a file will not cause an error. """self.project_name=project_nameself.bucket=bucketself.prefix=prefixself._loader_func=loader_funcself.continue_on_failure=continue_on_failure
[docs]defload(self)->List[Document]:"""Load documents."""try:fromgoogle.cloudimportstorageexceptImportError:raiseImportError("Could not import google-cloud-storage python package. ""Please install it with `pip install google-cloud-storage`.")client=storage.Client(project=self.project_name,client_info=get_client_info(module="google-cloud-storage"),)docs=[]forblobinclient.list_blobs(self.bucket,prefix=self.prefix):# we shall just skip directories since GCSFileLoader creates# intermediate directories on the flyifblob.name.endswith("/"):continue# Use the try-except block heretry:loader=GCSFileLoader(self.project_name,self.bucket,blob.name,loader_func=self._loader_func,)docs.extend(loader.load())exceptExceptionase:ifself.continue_on_failure:logger.warning(f"Problem processing blob {blob.name}, message: {e}")continueelse:raiseereturndocs