Source code for langchain_community.embeddings.voyageai
from__future__importannotationsimportjsonimportloggingfromtypingimport(Any,Callable,Dict,List,Optional,Tuple,Union,cast,)importrequestsfromlangchain_core._api.deprecationimportdeprecatedfromlangchain_core.embeddingsimportEmbeddingsfromlangchain_core.utilsimportconvert_to_secret_str,get_from_dict_or_envfrompydanticimportBaseModel,ConfigDict,SecretStr,model_validatorfromtenacityimport(before_sleep_log,retry,stop_after_attempt,wait_exponential,)logger=logging.getLogger(__name__)def_create_retry_decorator(embeddings:VoyageEmbeddings)->Callable[[Any],Any]:min_seconds=4max_seconds=10# Wait 2^x * 1 second between each retry starting with# 4 seconds, then up to 10 seconds, then 10 seconds afterwardsreturnretry(reraise=True,stop=stop_after_attempt(embeddings.max_retries),wait=wait_exponential(multiplier=1,min=min_seconds,max=max_seconds),before_sleep=before_sleep_log(logger,logging.WARNING),)def_check_response(response:dict)->dict:if"data"notinresponse:raiseRuntimeError(f"Voyage API Error. Message: {json.dumps(response)}")returnresponse
[docs]defembed_with_retry(embeddings:VoyageEmbeddings,**kwargs:Any)->Any:"""Use tenacity to retry the embedding call."""retry_decorator=_create_retry_decorator(embeddings)@retry_decoratordef_embed_with_retry(**kwargs:Any)->Any:response=requests.post(**kwargs)return_check_response(response.json())return_embed_with_retry(**kwargs)
[docs]@deprecated(since="0.0.29",removal="1.0",alternative_import="langchain_voyageai.VoyageAIEmbeddings",)classVoyageEmbeddings(BaseModel,Embeddings):"""Voyage embedding models. To use, you should have the environment variable ``VOYAGE_API_KEY`` set with your API key or pass it as a named parameter to the constructor. Example: .. code-block:: python from langchain_community.embeddings import VoyageEmbeddings voyage = VoyageEmbeddings(voyage_api_key="your-api-key", model="voyage-2") text = "This is a test query." query_result = voyage.embed_query(text) """model:strvoyage_api_base:str="https://api.voyageai.com/v1/embeddings"voyage_api_key:Optional[SecretStr]=Nonebatch_size:int"""Maximum number of texts to embed in each API request."""max_retries:int=6"""Maximum number of retries to make when generating."""request_timeout:Optional[Union[float,Tuple[float,float]]]=None"""Timeout in seconds for the API request."""show_progress_bar:bool=False"""Whether to show a progress bar when embedding. Must have tqdm installed if set to True."""truncation:bool=True"""Whether to truncate the input texts to fit within the context length. If True, over-length input texts will be truncated to fit within the context length, before vectorized by the embedding model. If False, an error will be raised if any given text exceeds the context length."""model_config=ConfigDict(extra="forbid",)@model_validator(mode="before")@classmethoddefvalidate_environment(cls,values:Dict)->Any:"""Validate that api key and python package exists in environment."""values["voyage_api_key"]=convert_to_secret_str(get_from_dict_or_env(values,"voyage_api_key","VOYAGE_API_KEY"))if"model"notinvalues:values["model"]="voyage-01"logger.warning("model will become a required arg for VoyageAIEmbeddings, ""we recommend to specify it when using this class. ""Currently the default is set to voyage-01.")if"batch_size"notinvalues:values["batch_size"]=(72if"model"invaluesand(values["model"]in["voyage-2","voyage-02"])else7)returnvaluesdef_invocation_params(self,input:List[str],input_type:Optional[str]=None)->Dict:api_key=cast(SecretStr,self.voyage_api_key).get_secret_value()params:Dict={"url":self.voyage_api_base,"headers":{"Authorization":f"Bearer {api_key}"},"json":{"model":self.model,"input":input,"input_type":input_type,"truncation":self.truncation,},"timeout":self.request_timeout,}returnparamsdef_get_embeddings(self,texts:List[str],batch_size:Optional[int]=None,input_type:Optional[str]=None,)->List[List[float]]:embeddings:List[List[float]]=[]ifbatch_sizeisNone:batch_size=self.batch_sizeifself.show_progress_bar:try:fromtqdm.autoimporttqdmexceptImportErrorase:raiseImportError("Must have tqdm installed if `show_progress_bar` is set to True. ""Please install with `pip install tqdm`.")frome_iter=tqdm(range(0,len(texts),batch_size))else:_iter=range(0,len(texts),batch_size)ifinput_typeandinput_typenotin["query","document"]:raiseValueError(f"input_type {input_type} is invalid. Options: None, 'query', ""'document'.")foriin_iter:response=embed_with_retry(self,**self._invocation_params(input=texts[i:i+batch_size],input_type=input_type),)embeddings.extend(r["embedding"]forrinresponse["data"])returnembeddings
[docs]defembed_documents(self,texts:List[str])->List[List[float]]:"""Call out to Voyage Embedding endpoint for embedding search docs. Args: texts: The list of texts to embed. Returns: List of embeddings, one for each text. """returnself._get_embeddings(texts,batch_size=self.batch_size,input_type="document")
[docs]defembed_query(self,text:str)->List[float]:"""Call out to Voyage Embedding endpoint for embedding query text. Args: text: The text to embed. Returns: Embedding for the text. """returnself._get_embeddings([text],batch_size=self.batch_size,input_type="query")[0]
[docs]defembed_general_texts(self,texts:List[str],*,input_type:Optional[str]=None)->List[List[float]]:"""Call out to Voyage Embedding endpoint for embedding general text. Args: texts: The list of texts to embed. input_type: Type of the input text. Default to None, meaning the type is unspecified. Other options: query, document. Returns: Embedding for the text. """returnself._get_embeddings(texts,batch_size=self.batch_size,input_type=input_type)