Source code for langchain_community.tools.edenai.ocr_identityparser

from __future__ import annotations

import logging
from typing import Optional, Type

from langchain_core.callbacks import CallbackManagerForToolRun
from langchain_core.pydantic_v1 import BaseModel, Field, HttpUrl

from langchain_community.tools.edenai.edenai_base_tool import EdenaiTool

logger = logging.getLogger(__name__)


[docs]class IDParsingInput(BaseModel): query: HttpUrl = Field(description="url of the document to parse")
[docs]class EdenAiParsingIDTool(EdenaiTool): """Tool that queries the Eden AI Identity parsing API. for api reference check edenai documentation: https://docs.edenai.co/reference/ocr_identity_parser_create. To use, you should have the environment variable ``EDENAI_API_KEY`` set with your API token. You can find your token here: https://app.edenai.run/admin/account/settings """ name: str = "edenai_identity_parsing" description: str = ( "A wrapper around edenai Services Identity parsing. " "Useful for when you have to extract information from an ID Document " "Input should be the string url of the document to parse." ) args_schema: Type[BaseModel] = IDParsingInput feature: str = "ocr" subfeature: str = "identity_parser" language: Optional[str] = None """ language of the text passed to the model. """ def _parse_response(self, response: list) -> str: formatted_list: list = [] if len(response) == 1: self._parse_json_multilevel( response[0]["extracted_data"][0], formatted_list ) else: for entry in response: if entry.get("provider") == "eden-ai": self._parse_json_multilevel( entry["extracted_data"][0], formatted_list ) return "\n".join(formatted_list) def _run( self, query: str, run_manager: Optional[CallbackManagerForToolRun] = None, ) -> str: """Use the tool.""" query_params = { "file_url": query, "language": self.language, "attributes_as_list": False, } return self._call_eden_ai(query_params)