[docs]deflazy_load(self,)->Iterator[Document]:"""Load documents lazily with concurrent parsing."""withconcurrent.futures.ThreadPoolExecutor(max_workers=self.num_workers)asexecutor:futures={executor.submit(self.blob_parser.lazy_parse,blob)forblobinself.blob_loader.yield_blobs()# type: ignore[attr-defined]}forfutureinconcurrent.futures.as_completed(futures):yield fromfuture.result()
[docs]@classmethoddeffrom_filesystem(cls,path:_PathLike,*,glob:str="**/[!.]*",exclude:Sequence[str]=(),suffixes:Optional[Sequence[str]]=None,show_progress:bool=False,parser:Union[DEFAULT,BaseBlobParser]="default",num_workers:int=4,parser_kwargs:Optional[dict]=None,)->ConcurrentLoader:"""Create a concurrent generic document loader using a filesystem blob loader. Args: path: The path to the directory to load documents from. glob: The glob pattern to use to find documents. suffixes: The suffixes to use to filter documents. If None, all files matching the glob will be loaded. exclude: A list of patterns to exclude from the loader. show_progress: Whether to show a progress bar or not (requires tqdm). Proxies to the file system loader. parser: A blob parser which knows how to parse blobs into documents num_workers: Max number of concurrent workers to use. parser_kwargs: Keyword arguments to pass to the parser. """blob_loader=FileSystemBlobLoader(# type: ignore[attr-defined, misc]path,glob=glob,exclude=exclude,suffixes=suffixes,show_progress=show_progress,)ifisinstance(parser,str):ifparser=="default"andcls.get_parser!=GenericLoader.get_parser:# There is an implementation of get_parser on the class, use it.blob_parser=cls.get_parser(**(parser_kwargsor{}))else:blob_parser=get_parser(parser)else:blob_parser=parserreturncls(blob_loader,blob_parser,num_workers=num_workers)