[docs]classAzureAIDocumentIntelligenceLoader(BaseLoader):"""Load a PDF with Azure Document Intelligence."""
[docs]def__init__(self,api_endpoint:str,api_key:str,file_path:Optional[str]=None,url_path:Optional[str]=None,api_version:Optional[str]=None,api_model:str="prebuilt-layout",mode:str="markdown",*,analysis_features:Optional[List[str]]=None,)->None:""" Initialize the object for file processing with Azure Document Intelligence (formerly Form Recognizer). This constructor initializes a AzureAIDocumentIntelligenceParser object to be used for parsing files using the Azure Document Intelligence API. The load method generates Documents whose content representations are determined by the mode parameter. Parameters: ----------- api_endpoint: str The API endpoint to use for DocumentIntelligenceClient construction. api_key: str The API key to use for DocumentIntelligenceClient construction. file_path : Optional[str] The path to the file that needs to be loaded. Either file_path or url_path must be specified. url_path : Optional[str] The URL to the file that needs to be loaded. Either file_path or url_path must be specified. api_version: Optional[str] The API version for DocumentIntelligenceClient. Setting None to use the default value from `azure-ai-documentintelligence` package. api_model: str Unique document model name. Default value is "prebuilt-layout". Note that overriding this default value may result in unsupported behavior. mode: Optional[str] The type of content representation of the generated Documents. Use either "single", "page", or "markdown". Default value is "markdown". analysis_features: Optional[List[str]] List of optional analysis features, each feature should be passed as a str that conforms to the enum `DocumentAnalysisFeature` in `azure-ai-documentintelligence` package. Default value is None. Examples: --------- >>> obj = AzureAIDocumentIntelligenceLoader( ... file_path="path/to/file", ... api_endpoint="https://endpoint.azure.com", ... api_key="APIKEY", ... api_version="2023-10-31-preview", ... api_model="prebuilt-layout", ... mode="markdown" ... ) """assert(file_pathisnotNoneorurl_pathisnotNone),"file_path or url_path must be provided"self.file_path=file_pathself.url_path=url_pathself.parser=AzureAIDocumentIntelligenceParser(# type: ignore[misc]api_endpoint=api_endpoint,api_key=api_key,api_version=api_version,api_model=api_model,mode=mode,analysis_features=analysis_features,)
[docs]deflazy_load(self,)->Iterator[Document]:"""Lazy load given path as pages."""ifself.file_pathisnotNone:blob=Blob.from_path(self.file_path)# type: ignore[attr-defined]yield fromself.parser.parse(blob)else:yield fromself.parser.parse_url(self.url_path)# type: ignore[arg-type]