Source code for langchain_google_community.vision

from typing import Iterator, List, Optional

from langchain_core.document_loaders import BaseBlobParser, BaseLoader
from langchain_core.document_loaders.blob_loaders import Blob
from langchain_core.documents import Document

from langchain_google_community._utils import get_client_info


[docs]class CloudVisionParser(BaseBlobParser):
[docs] def __init__(self, project: Optional[str] = None): try: from google.cloud import vision # type: ignore[attr-defined] except ImportError as e: raise ImportError( "Could not import google-cloud-vision python package. " "Please, install vision dependency group: " "poetry install --with vision" ) from e client_options = None if project: client_options = {"quota_project_id": project} self._client = vision.ImageAnnotatorClient( client_options=client_options, client_info=get_client_info(module="cloud-vision"), )
[docs] def load(self, gcs_uri: str) -> Document: """Loads an image from GCS path to a Document, only the text.""" from google.cloud import vision # type: ignore[attr-defined] image = vision.Image(source=vision.ImageSource(image_uri=gcs_uri)) text_detection_response = self._client.text_detection(image=image) annotations = text_detection_response.text_annotations if annotations: text = annotations[0].description else: text = "" return Document(page_content=text, metadata={"source": gcs_uri})
[docs] def lazy_parse(self, blob: Blob) -> Iterator[Document]: yield self.load(blob.path) # type: ignore[arg-type]
[docs]class CloudVisionLoader(BaseLoader):
[docs] def __init__(self, file_path: str, project: Optional[str] = None): try: from google.cloud import vision # type: ignore[attr-defined] except ImportError as e: raise ImportError( "Could not import google-cloud-vision python package. " "Please, install vision dependency group: " "`pip install langchain-google-community[vision]`" ) from e client_options = None if project: client_options = {"quota_project_id": project} self._client = vision.ImageAnnotatorClient( client_options=client_options, client_info=get_client_info(module="cloud-vision"), ) self._file_path = file_path
[docs] def load(self) -> List[Document]: """Loads an image from GCS path to a Document, only the text.""" from google.cloud import vision # type: ignore[attr-defined] image = vision.Image(source=vision.ImageSource(image_uri=self._file_path)) text_detection_response = self._client.text_detection(image=image) annotations = text_detection_response.text_annotations if annotations: text = annotations[0].description else: text = "" return [Document(page_content=text, metadata={"source": self._file_path})]