Source code for langchain_community.tools.edenai.ocr_identityparser
from __future__ import annotations
import logging
from typing import Optional, Type
from langchain_core.callbacks import CallbackManagerForToolRun
from langchain_core.pydantic_v1 import BaseModel, Field, HttpUrl
from langchain_community.tools.edenai.edenai_base_tool import EdenaiTool
logger = logging.getLogger(__name__)
[docs]class IDParsingInput(BaseModel):
query: HttpUrl = Field(description="url of the document to parse")
[docs]class EdenAiParsingIDTool(EdenaiTool):
"""Tool that queries the Eden AI Identity parsing API.
for api reference check edenai documentation:
https://docs.edenai.co/reference/ocr_identity_parser_create.
To use, you should have
the environment variable ``EDENAI_API_KEY`` set with your API token.
You can find your token here: https://app.edenai.run/admin/account/settings
"""
name: str = "edenai_identity_parsing"
description: str = (
"A wrapper around edenai Services Identity parsing. "
"Useful for when you have to extract information from an ID Document "
"Input should be the string url of the document to parse."
)
args_schema: Type[BaseModel] = IDParsingInput
feature: str = "ocr"
subfeature: str = "identity_parser"
language: Optional[str] = None
"""
language of the text passed to the model.
"""
def _parse_response(self, response: list) -> str:
formatted_list: list = []
if len(response) == 1:
self._parse_json_multilevel(
response[0]["extracted_data"][0], formatted_list
)
else:
for entry in response:
if entry.get("provider") == "eden-ai":
self._parse_json_multilevel(
entry["extracted_data"][0], formatted_list
)
return "\n".join(formatted_list)
def _run(
self,
query: str,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> str:
"""Use the tool."""
query_params = {
"file_url": query,
"language": self.language,
"attributes_as_list": False,
}
return self._call_eden_ai(query_params)