[docs]defis_local(url:str)->bool:"""Check if a URL is a local file. Args: url (str): The URL to check. Returns: bool: True if the URL is a local file, False otherwise. """url_parsed=urlparse(url)ifurl_parsed.schemein("file",""):# Possibly a local filereturnexists(url_parsed.path)returnFalse
[docs]defget_bytes_str(file_path:str)->str:"""Get the bytes string of a file. Args: file_path (str): The path to the file. Returns: str: The bytes string of the file. """withopen(file_path,"rb")asimage_file:returnbase64.b64encode(image_file.read()).decode("utf-8")
[docs]classJinaEmbeddings(BaseModel,Embeddings):"""Jina embedding models."""session:Any#: :meta private:model_name:str="jina-embeddings-v2-base-en"jina_api_key:Optional[SecretStr]=Nonemodel_config=ConfigDict(protected_namespaces=())@model_validator(mode="before")@classmethoddefvalidate_environment(cls,values:Dict)->Any:"""Validate that auth token exists in environment."""try:jina_api_key=convert_to_secret_str(get_from_dict_or_env(values,"jina_api_key","JINA_API_KEY"))exceptValueErrorasoriginal_exc:try:jina_api_key=convert_to_secret_str(get_from_dict_or_env(values,"jina_auth_token","JINA_AUTH_TOKEN"))exceptValueError:raiseoriginal_excsession=requests.Session()session.headers.update({"Authorization":f"Bearer {jina_api_key.get_secret_value()}","Accept-Encoding":"identity","Content-type":"application/json",})values["session"]=sessionreturnvaluesdef_embed(self,input:Any)->List[List[float]]:# Call Jina AI Embedding APIresp=self.session.post(# type: ignoreJINA_API_URL,json={"input":input,"model":self.model_name}).json()if"data"notinresp:raiseRuntimeError(resp["detail"])embeddings=resp["data"]# Sort resulting embeddings by indexsorted_embeddings=sorted(embeddings,key=lambdae:e["index"])# type: ignore# Return just the embeddingsreturn[result["embedding"]forresultinsorted_embeddings]
[docs]defembed_documents(self,texts:List[str])->List[List[float]]:"""Call out to Jina's embedding endpoint. Args: texts: The list of texts to embed. Returns: List of embeddings, one for each text. """returnself._embed(texts)
[docs]defembed_query(self,text:str)->List[float]:"""Call out to Jina's embedding endpoint. Args: text: The text to embed. Returns: Embeddings for the text. """returnself._embed([text])[0]
[docs]defembed_images(self,uris:List[str])->List[List[float]]:"""Call out to Jina's image embedding endpoint. Args: uris: The list of uris to embed. Returns: List of embeddings, one for each text. """input=[]foruriinuris:ifis_local(uri):input.append({"bytes":get_bytes_str(uri)})else:input.append({"url":uri})returnself._embed(input)