[docs]classOBSFileLoader(BaseLoader):"""Load from the `Huawei OBS file`."""
[docs]def__init__(self,bucket:str,key:str,client:Any=None,endpoint:str="",config:Optional[dict]=None,)->None:"""Initialize the OBSFileLoader with the specified settings. Args: bucket (str): The name of the OBS bucket to be used. key (str): The name of the object in the OBS bucket. client (ObsClient, optional): An instance of the ObsClient to connect to OBS. endpoint (str, optional): The endpoint URL of your OBS bucket. This parameter is mandatory if `client` is not provided. config (dict, optional): The parameters for connecting to OBS, provided as a dictionary. This parameter is ignored if `client` is provided. The dictionary could have the following keys: - "ak" (str, optional): Your OBS access key (required if `get_token_from_ecs` is False and bucket policy is not public read). - "sk" (str, optional): Your OBS secret key (required if `get_token_from_ecs` is False and bucket policy is not public read). - "token" (str, optional): Your security token (required if using temporary credentials). - "get_token_from_ecs" (bool, optional): Whether to retrieve the security token from ECS. Defaults to False if not provided. If set to True, `ak`, `sk`, and `token` will be ignored. Raises: ValueError: If the `esdk-obs-python` package is not installed. TypeError: If the provided `client` is not an instance of ObsClient. ValueError: If `client` is not provided, but `endpoint` is missing. Note: Before using this class, make sure you have registered with OBS and have the necessary credentials. The `ak`, `sk`, and `endpoint` values are mandatory unless `get_token_from_ecs` is True or the bucket policy is public read. `token` is required when using temporary credentials. Example: To create a new OBSFileLoader with a new client: ``` config = { "ak": "your-access-key", "sk": "your-secret-key" } obs_loader = OBSFileLoader("your-bucket-name", "your-object-key", config=config) ``` To create a new OBSFileLoader with an existing client: ``` from obs import ObsClient # Assuming you have an existing ObsClient object 'obs_client' obs_loader = OBSFileLoader("your-bucket-name", "your-object-key", client=obs_client) ``` To create a new OBSFileLoader without an existing client: ``` obs_loader = OBSFileLoader("your-bucket-name", "your-object-key", endpoint="your-endpoint-url") ``` """# noqa: E501try:fromobsimportObsClientexceptImportError:raiseImportError("Could not import esdk-obs-python python package. ""Please install it with `pip install esdk-obs-python`.")ifnotclient:ifnotendpoint:raiseValueError("Either OBSClient or endpoint must be provided.")ifnotconfig:config=dict()ifconfig.get("get_token_from_ecs"):client=ObsClient(server=endpoint,security_provider_policy="ECS")else:client=ObsClient(access_key_id=config.get("ak"),secret_access_key=config.get("sk"),security_token=config.get("token"),server=endpoint,)ifnotisinstance(client,ObsClient):raiseTypeError("Client must be ObsClient type")self.client=clientself.bucket=bucketself.key=key
[docs]defload(self)->List[Document]:"""Load documents."""withtempfile.TemporaryDirectory()astemp_dir:file_path=f"{temp_dir}/{self.bucket}/{self.key}"os.makedirs(os.path.dirname(file_path),exist_ok=True)# Download the file to a destinationself.client.downloadFile(bucketName=self.bucket,objectKey=self.key,downloadFile=file_path)loader=UnstructuredFileLoader(file_path)returnloader.load()