Source code for langchain_box.utilities.box

"""Util that calls Box APIs."""

from enum import Enum
from typing import Any, Dict, List, Optional

import box_sdk_gen  # type: ignore
import requests
from langchain_core.documents import Document
from langchain_core.pydantic_v1 import BaseModel, root_validator
from langchain_core.utils import get_from_dict_or_env


[docs]class DocumentFiles(Enum): """DocumentFiles(Enum). An enum containing all of the supported extensions for files Box considers Documents. These files should have text representations. """ DOC = "doc" DOCX = "docx" GDOC = "gdoc" GSHEET = "gsheet" NUMBERS = "numbers" ODS = "ods" ODT = "odt" PAGES = "pages" PDF = "pdf" RTF = "rtf" WPD = "wpd" XLS = "xls" XLSM = "xlsm" XLSX = "xlsx" AS = "as" AS3 = "as3" ASM = "asm" BAT = "bat" C = "c" CC = "cc" CMAKE = "cmake" CPP = "cpp" CS = "cs" CSS = "css" CSV = "csv" CXX = "cxx" DIFF = "diff" ERB = "erb" GROOVY = "groovy" H = "h" HAML = "haml" HH = "hh" HTM = "htm" HTML = "html" JAVA = "java" JS = "js" JSON = "json" LESS = "less" LOG = "log" M = "m" MAKE = "make" MD = "md" ML = "ml" MM = "mm" MSG = "msg" PHP = "php" PL = "pl" PROPERTIES = "properties" PY = "py" RB = "rb" RST = "rst" SASS = "sass" SCALA = "scala" SCM = "scm" SCRIPT = "script" SH = "sh" SML = "sml" SQL = "sql" TXT = "txt" VI = "vi" VIM = "vim" WEBDOC = "webdoc" XHTML = "xhtml" XLSB = "xlsb" XML = "xml" XSD = "xsd" XSL = "xsl" YAML = "yaml" GSLLIDE = "gslide" GSLIDES = "gslides" KEY = "key" ODP = "odp" PPT = "ppt" PPTX = "pptx" BOXNOTE = "boxnote"
[docs]class ImageFiles(Enum): """ImageFiles(Enum). An enum containing all of the supported extensions for files Box considers images. """ ARW = "arw" BMP = "bmp" CR2 = "cr2" DCM = "dcm" DICM = "dicm" DICOM = "dicom" DNG = "dng" EPS = "eps" EXR = "exr" GIF = "gif" HEIC = "heic" INDD = "indd" INDML = "indml" INDT = "indt" INX = "inx" JPEG = "jpeg" JPG = "jpg" NEF = "nef" PNG = "png" SVG = "svg" TIF = "tif" TIFF = "tiff" TGA = "tga" SVS = "svs"
[docs]class BoxAuthType(Enum): """BoxAuthType(Enum). an enum to tell BoxLoader how you wish to autheticate your Box connection. Options are: TOKEN - Use a developer token generated from the Box Deevloper Token. Only recommended for development. Provide ``box_developer_token``. CCG - Client Credentials Grant. provide ``box_client_id`, ``box_client_secret`, and ``box_enterprise_id`` or optionally `box_user_id`. JWT - Use JWT for authentication. Config should be stored on the file system accessible to your app. provide ``box_jwt_path``. Optionally, provide ``box_user_id`` to act as a specific user """ TOKEN = "token" """Use a developer token or a token retrieved from ``box-sdk-gen``""" CCG = "ccg" """Use ``client_credentials`` type grant""" JWT = "jwt" """Use JWT bearer token auth"""
[docs]class BoxAuth(BaseModel): """**BoxAuth.** The ``box-langchain`` package offers some flexibility to authentication. The most basic authentication method is by using a developer token. This can be found in the `Box developer console <https://account.box.com/developers/console>`_ on the configuration screen. This token is purposely short-lived (1 hour) and is intended for development. With this token, you can add it to your environment as ``BOX_DEVELOPER_TOKEN``, you can pass it directly to the loader, or you can use the ``BoxAuth`` authentication helper class. `BoxAuth` supports the following authentication methods: * **Token** — either a developer token or any token generated through the Box SDK * **JWT** with a service account * **JWT** with a specified user * **CCG** with a service account * **CCG** with a specified user .. note:: If using JWT authentication, you will need to download the configuration from the Box developer console after generating your public/private key pair. Place this file in your application directory structure somewhere. You will use the path to this file when using the ``BoxAuth`` helper class. If you wish to use OAuth2 with the authorization_code flow, please use ``BoxAuthType.TOKEN`` with the token you have acquired. For more information, learn about how to `set up a Box application <https://developer.box.com/guides/getting-started/first-application/>`_, and check out the `Box authentication guide <https://developer.box.com/guides/authentication/select/>`_ for more about our different authentication options. Simple implementation: To instantiate, you must provide a ``langchain_box.utilities.BoxAuthType``. BoxAuthType is an enum to tell BoxLoader how you wish to autheticate your Box connection. Options are: TOKEN - Use a developer token generated from the Box Deevloper Token. Only recommended for development. Provide ``box_developer_token``. CCG - Client Credentials Grant. provide ``box_client_id``, ``box_client_secret``, and ``box_enterprise_id`` or optionally ``box_user_id``. JWT - Use JWT for authentication. Config should be stored on the file system accessible to your app. provide ``box_jwt_path``. Optionally, provide ``box_user_id`` to act as a specific user **Examples**: **Token** .. code-block:: python from langchain_box.document_loaders import BoxLoader from langchain_box.utilities import BoxAuth, BoxAuthType auth = BoxAuth( auth_type=BoxAuthType.TOKEN, box_developer_token=box_developer_token ) loader = BoxLoader( box_auth=auth, ... ) **JWT with a service account** .. code-block:: python from langchain_box.document_loaders import BoxLoader from langchain_box.utilities import BoxAuth, BoxAuthType auth = BoxAuth( auth_type=BoxAuthType.JWT, box_jwt_path=box_jwt_path ) loader = BoxLoader( box_auth=auth, ... ) **JWT with a specified user** .. code-block:: python from langchain_box.document_loaders import BoxLoader from langchain_box.utilities import BoxAuth, BoxAuthType auth = BoxAuth( auth_type=BoxAuthType.JWT, box_jwt_path=box_jwt_path, box_user_id=box_user_id ) loader = BoxLoader( box_auth=auth, ... ) **CCG with a service account** .. code-block:: python from langchain_box.document_loaders import BoxLoader from langchain_box.utilities import BoxAuth, BoxAuthType auth = BoxAuth( auth_type=BoxAuthType.CCG, box_client_id=box_client_id, box_client_secret=box_client_secret, box_enterprise_id=box_enterprise_id ) loader = BoxLoader( box_auth=auth, ... ) **CCG with a specified user** .. code-block:: python from langchain_box.document_loaders import BoxLoader from langchain_box.utilities import BoxAuth, BoxAuthType auth = BoxAuth( auth_type=BoxAuthType.CCG, box_client_id=box_client_id, box_client_secret=box_client_secret, box_user_id=box_user_id ) loader = BoxLoader( box_auth=auth, ... ) """ auth_type: BoxAuthType """``langchain_box.utilities.BoxAuthType``. Enum describing how to authenticate against Box""" box_developer_token: Optional[str] = None """ If using ``BoxAuthType.TOKEN``, provide your token here""" box_jwt_path: Optional[str] = None """If using ``BoxAuthType.JWT``, provide local path to your JWT configuration file""" box_client_id: Optional[str] = None """If using ``BoxAuthType.CCG``, provide your app's client ID""" box_client_secret: Optional[str] = None """If using ``BoxAuthType.CCG``, provide your app's client secret""" box_enterprise_id: Optional[str] = None """If using ``BoxAuthType.CCG``, provide your enterprise ID. Only required if you are not sending ``box_user_id``""" box_user_id: Optional[str] = None """If using ``BoxAuthType.CCG`` or ``BoxAuthType.JWT``, providing ``box_user_id`` will act on behalf of a specific user""" _box_client: Optional[box_sdk_gen.BoxClient] = None _custom_header: Dict = dict({"x-box-ai-library": "langchain"}) class Config: arbitrary_types_allowed = True use_enum_values = True extra = "allow" @root_validator() def validate_box_auth_inputs(cls, values: Dict[str, Any]) -> Dict[str, Any]: """Validate auth_type is set""" if not values.get("auth_type"): raise ValueError("Auth type must be set.") """Validate that TOKEN auth type provides box_developer_token.""" if values.get("auth_type") == "token": if not get_from_dict_or_env( values, "box_developer_token", "BOX_DEVELOPER_TOKEN" ): raise ValueError( f"{values.get('auth_type')} requires box_developer_token to be set" ) """Validate that JWT auth type provides box_jwt_path.""" if values.get("auth_type") == "jwt": if not get_from_dict_or_env(values, "box_jwt_path", "BOX_JWT_PATH"): raise ValueError( f"{values.get('auth_type')} requires box_jwt_path to be set" ) """Validate that CCG auth type provides box_client_id and box_client_secret and either box_enterprise_id or box_user_id.""" if values.get("auth_type") == "ccg": if ( not get_from_dict_or_env(values, "box_client_id", "BOX_CLIENT_ID") or not get_from_dict_or_env( values, "box_client_secret", "BOX_CLIENT_SECRET" ) or ( not values.get("box_enterprise_id") and not values.get("box_user_id") ) ): raise ValueError( f"{values.get('auth_type')} requires box_client_id, \ box_client_secret, and box_enterprise_id." ) return values def _authorize(self) -> None: match self.auth_type: case "token": try: auth = box_sdk_gen.BoxDeveloperTokenAuth( token=self.box_developer_token ) self._box_client = box_sdk_gen.BoxClient( auth=auth ).with_extra_headers(extra_headers=self._custom_header) except box_sdk_gen.BoxSDKError as bse: raise RuntimeError( f"Error getting client from developer token: {bse.message}" ) except Exception as ex: raise ValueError( f"Invalid Box developer token. Please verify your \ token and try again.\n{ex}" ) from ex case "jwt": try: jwt_config = box_sdk_gen.JWTConfig.from_config_file( config_file_path=self.box_jwt_path ) auth = box_sdk_gen.BoxJWTAuth(config=jwt_config) self._box_client = box_sdk_gen.BoxClient( auth=auth ).with_extra_headers(extra_headers=self._custom_header) if self.box_user_id is not None: user_auth = auth.with_user_subject(self.box_user_id) self._box_client = box_sdk_gen.BoxClient( auth=user_auth ).with_extra_headers(extra_headers=self._custom_header) except box_sdk_gen.BoxSDKError as bse: raise RuntimeError( f"Error getting client from jwt token: {bse.message}" ) except Exception as ex: raise ValueError( "Error authenticating. Please verify your JWT config \ and try again." ) from ex case "ccg": try: if self.box_user_id is not None: ccg_config = box_sdk_gen.CCGConfig( client_id=self.box_client_id, client_secret=self.box_client_secret, user_id=self.box_user_id, ) else: ccg_config = box_sdk_gen.CCGConfig( client_id=self.box_client_id, client_secret=self.box_client_secret, enterprise_id=self.box_enterprise_id, ) auth = box_sdk_gen.BoxCCGAuth(config=ccg_config) self._box_client = box_sdk_gen.BoxClient( auth=auth ).with_extra_headers(extra_headers=self._custom_header) except box_sdk_gen.BoxSDKError as bse: raise RuntimeError( f"Error getting client from ccg token: {bse.message}" ) except Exception as ex: raise ValueError( "Error authenticating. Please verify you are providing a \ valid client id, secret and either a valid user ID or \ enterprise ID." ) from ex case _: raise ValueError( f"{self.auth_type} is not a valid auth_type. Value must be \ TOKEN, CCG, or JWT." )
[docs] def get_client(self) -> box_sdk_gen.BoxClient: """Instantiate the Box SDK.""" if self._box_client is None: self._authorize() return self._box_client
class _BoxAPIWrapper(BaseModel): """Wrapper for Box API.""" box_developer_token: Optional[str] = None """String containing the Box Developer Token generated in the developer console""" box_auth: Optional[BoxAuth] = None """Configured langchain_box.utilities.BoxAuth object""" character_limit: Optional[int] = -1 """character_limit is an int that caps the number of characters to return per document.""" _box: Optional[box_sdk_gen.BoxClient] class Config: arbitrary_types_allowed = True use_enum_values = True extra = "allow" @root_validator(allow_reuse=True) def validate_box_api_inputs(cls, values: Dict[str, Any]) -> Dict[str, Any]: values["_box"] = None """Validate that TOKEN auth type provides box_developer_token.""" if not values.get("box_auth"): if not get_from_dict_or_env( values, "box_developer_token", "BOX_DEVELOPER_TOKEN" ): raise ValueError( "You must configure either box_developer_token of box_auth" ) else: box_auth = values.get("box_auth") values["_box"] = box_auth.get_client() # type: ignore[union-attr] return values def get_box_client(self) -> box_sdk_gen.BoxClient: box_auth = BoxAuth( auth_type=BoxAuthType.TOKEN, box_developer_token=self.box_developer_token ) self._box = box_auth.get_client() def _do_request(self, url: str) -> Any: try: access_token = self._box.auth.retrieve_token().access_token # type: ignore[union-attr] except box_sdk_gen.BoxSDKError as bse: raise RuntimeError(f"Error getting client from jwt token: {bse.message}") resp = requests.get(url, headers={"Authorization": f"Bearer {access_token}"}) resp.raise_for_status() return resp.content def _get_text_representation(self, file_id: str = "") -> tuple[str, str, str]: try: from box_sdk_gen import BoxAPIError, BoxSDKError except ImportError: raise ImportError("You must run `pip install box-sdk-gen`") if self._box is None: self.get_box_client() try: file = self._box.files.get_file_by_id( # type: ignore[union-attr] file_id, x_rep_hints="[extracted_text]", fields=["name", "representations", "type"], ) except BoxAPIError as bae: raise RuntimeError(f"BoxAPIError: Error getting text rep: {bae.message}") except BoxSDKError as bse: raise RuntimeError(f"BoxSDKError: Error getting text rep: {bse.message}") except Exception: return None, None, None # type: ignore[return-value] file_repr = file.representations.entries if len(file_repr) <= 0: return None, None, None # type: ignore[return-value] for entry in file_repr: if entry.representation == "extracted_text": # If the file representation doesn't exist, calling # info.url will generate text if possible if entry.status.state == "none": self._do_request(entry.info.url) url = entry.content.url_template.replace("{+asset_path}", "") file_name = file.name.replace(".", "_").replace(" ", "_") try: raw_content = self._do_request(url) except requests.exceptions.HTTPError: return None, None, None # type: ignore[return-value] if ( self.character_limit is not None and self.character_limit > 0 # type: ignore[operator] ): content = raw_content[0 : (self.character_limit - 1)] else: content = raw_content return file_name, content, url return None, None, None # type: ignore[return-value] def get_document_by_file_id(self, file_id: str) -> Optional[Document]: """Load a file from a Box id. Accepts file_id as str. Returns `Document`""" if self._box is None: self.get_box_client() file = self._box.files.get_file_by_id( # type: ignore[union-attr] file_id, fields=["name", "type", "extension"] ) if file.type == "file": if hasattr(DocumentFiles, file.extension.upper()): file_name, content, url = self._get_text_representation(file_id=file_id) if file_name is None or content is None or url is None: return None metadata = { "source": f"{url}", "title": f"{file_name}", } return Document(page_content=content, metadata=metadata) return None return None def get_folder_items(self, folder_id: str) -> box_sdk_gen.Items: """Get all the items in a folder. Accepts folder_id as str. returns box_sdk_gen.Items""" if self._box is None: self.get_box_client() try: folder_contents = self._box.folders.get_folder_items( # type: ignore[union-attr] folder_id, fields=["id", "type", "name"] ) except box_sdk_gen.BoxAPIError as bae: raise RuntimeError( f"BoxAPIError: Error getting folder content: {bae.message}" ) except box_sdk_gen.BoxSDKError as bse: raise RuntimeError( f"BoxSDKError: Error getting folder content: {bse.message}" ) return folder_contents.entries def search_box(self, query: str) -> List[Document]: if self._box is None: self.get_box_client() files = [] try: results = self._box.search.search_for_content( # type: ignore[union-attr] query=query, fields=["id", "type", "extension"] ) if results.entries is None or len(results.entries) <= 0: return None # type: ignore[return-value] for file in results.entries: if ( file is not None and file.type == "file" and hasattr(DocumentFiles, file.extension.upper()) ): doc = self.get_document_by_file_id(file.id) if doc is not None: files.append(doc) return files except box_sdk_gen.BoxAPIError as bae: raise RuntimeError( f"BoxAPIError: Error getting search results: {bae.message}" ) except box_sdk_gen.BoxSDKError as bse: raise RuntimeError( f"BoxSDKError: Error getting search results: {bse.message}" ) def ask_box_ai(self, query: str, box_file_ids: List[str]) -> List[Document]: if self._box is None: self.get_box_client() ai_mode = box_sdk_gen.CreateAiAskMode.SINGLE_ITEM_QA.value if len(box_file_ids) > 1: ai_mode = box_sdk_gen.CreateAiAskMode.MULTIPLE_ITEM_QA.value elif len(box_file_ids) <= 0: raise ValueError("BOX_AI_ASK requires at least one file ID") items = [] for file_id in box_file_ids: item = box_sdk_gen.CreateAiAskItems( id=file_id, type=box_sdk_gen.CreateAiAskItemsTypeField.FILE.value ) items.append(item) try: response = self._box.ai.create_ai_ask(ai_mode, query, items) # type: ignore[union-attr] except box_sdk_gen.BoxAPIError as bae: raise RuntimeError( f"BoxAPIError: Error getting Box AI result: {bae.message}" ) except box_sdk_gen.BoxSDKError as bse: raise RuntimeError( f"BoxSDKError: Error getting Box AI result: {bse.message}" ) content = response.answer metadata = {"source": "Box AI", "title": f"Box AI {query}"} return [Document(page_content=content, metadata=metadata)]