Source code for langchain_community.document_loaders.onedrive
"""Loads data from OneDrive"""from__future__importannotationsimportloggingfromtypingimportTYPE_CHECKING,Iterator,List,Optional,Sequence,Unionfromlangchain_core.documentsimportDocumentfromlangchain_core.pydantic_v1importFieldfromlangchain_community.document_loaders.base_o365import(O365BaseLoader,_FileType,)fromlangchain_community.document_loaders.parsers.registryimportget_parserifTYPE_CHECKING:fromO365.driveimportDrive,Folderlogger=logging.getLogger(__name__)
[docs]classOneDriveLoader(O365BaseLoader):"""Load from `Microsoft OneDrive`."""drive_id:str=Field(...)""" The ID of the OneDrive drive to load data from."""folder_path:Optional[str]=None""" The path to the folder to load data from."""object_ids:Optional[List[str]]=None""" The IDs of the objects to load data from."""@propertydef_file_types(self)->Sequence[_FileType]:"""Return supported file types."""return_FileType.DOC,_FileType.DOCX,_FileType.PDF@propertydef_scopes(self)->List[str]:"""Return required scopes."""return["offline_access","Files.Read.All"]def_get_folder_from_path(self,drive:Drive)->Union[Folder,Drive]:""" Returns the folder or drive object located at the specified path relative to the given drive. Args: drive (Drive): The root drive from which the folder path is relative. Returns: Union[Folder, Drive]: The folder or drive object located at the specified path. Raises: FileNotFoundError: If the path does not exist. """subfolder_drive=driveifself.folder_pathisNone:returnsubfolder_drivesubfolders=[fforfinself.folder_path.split("/")iff!=""]iflen(subfolders)==0:returnsubfolder_driveitems=subfolder_drive.get_items()forsubfolderinsubfolders:try:subfolder_drive=list(filter(lambdax:subfolderinx.name,items))[0]items=subfolder_drive.get_items()except(IndexError,AttributeError):raiseFileNotFoundError("Path {} not exist.".format(self.folder_path))returnsubfolder_drive
[docs]deflazy_load(self)->Iterator[Document]:"""Load documents lazily. Use this when working at a large scale."""try:fromO365.driveimportDriveexceptImportError:raiseImportError("O365 package not found, please install it with `pip install o365`")drive=self._auth().storage().get_drive(self.drive_id)ifnotisinstance(drive,Drive):raiseValueError(f"There isn't a Drive with id {self.drive_id}.")blob_parser=get_parser("default")ifself.folder_path:folder=self._get_folder_from_path(drive)forblobinself._load_from_folder(folder):yield fromblob_parser.lazy_parse(blob)ifself.object_ids:forblobinself._load_from_object_ids(drive,self.object_ids):yield fromblob_parser.lazy_parse(blob)