[docs]@deprecated(since="0.2.2",removal="1.0",alternative_import="langchain_huggingface.HuggingFaceEndpointEmbeddings",)classHuggingFaceHubEmbeddings(BaseModel,Embeddings):"""HuggingFaceHub embedding models. To use, you should have the ``huggingface_hub`` python package installed, and the environment variable ``HUGGINGFACEHUB_API_TOKEN`` set with your API token, or pass it as a named parameter to the constructor. Example: .. code-block:: python from langchain_community.embeddings import HuggingFaceHubEmbeddings model = "sentence-transformers/all-mpnet-base-v2" hf = HuggingFaceHubEmbeddings( model=model, task="feature-extraction", huggingfacehub_api_token="my-api-key", ) """client:Any#: :meta private:async_client:Any#: :meta private:model:Optional[str]=None"""Model name to use."""repo_id:Optional[str]=None"""Huggingfacehub repository id, for backward compatibility."""task:Optional[str]="feature-extraction""""Task to call the model with."""model_kwargs:Optional[dict]=None"""Keyword arguments to pass to the model."""huggingfacehub_api_token:Optional[str]=NoneclassConfig:extra="forbid"@root_validator(pre=True)defvalidate_environment(cls,values:Dict)->Dict:"""Validate that api key and python package exists in environment."""huggingfacehub_api_token=get_from_dict_or_env(values,"huggingfacehub_api_token","HUGGINGFACEHUB_API_TOKEN")try:fromhuggingface_hubimportAsyncInferenceClient,InferenceClientifvalues.get("model"):values["repo_id"]=values["model"]elifvalues.get("repo_id"):values["model"]=values["repo_id"]else:values["model"]=DEFAULT_MODELvalues["repo_id"]=DEFAULT_MODELclient=InferenceClient(model=values["model"],token=huggingfacehub_api_token,)async_client=AsyncInferenceClient(model=values["model"],token=huggingfacehub_api_token,)values["client"]=clientvalues["async_client"]=async_clientexceptImportError:raiseImportError("Could not import huggingface_hub python package. ""Please install it with `pip install huggingface_hub`.")returnvalues@root_validator(pre=False,skip_on_failure=True)defpost_init(cls,values:Dict)->Dict:"""Post init validation for the class."""ifvalues["task"]notinVALID_TASKS:raiseValueError(f"Got invalid task {values['task']}, "f"currently only {VALID_TASKS} are supported")returnvalues
[docs]defembed_documents(self,texts:List[str])->List[List[float]]:"""Call out to HuggingFaceHub's embedding endpoint for embedding search docs. Args: texts: The list of texts to embed. Returns: List of embeddings, one for each text. """# replace newlines, which can negatively affect performance.texts=[text.replace("\n"," ")fortextintexts]_model_kwargs=self.model_kwargsor{}# api doc: https://huggingface.github.io/text-embeddings-inference/#/Text%20Embeddings%20Inference/embedresponses=self.client.post(json={"inputs":texts,**_model_kwargs},task=self.task)returnjson.loads(responses.decode())
[docs]asyncdefaembed_documents(self,texts:List[str])->List[List[float]]:"""Async Call to HuggingFaceHub's embedding endpoint for embedding search docs. Args: texts: The list of texts to embed. Returns: List of embeddings, one for each text. """# replace newlines, which can negatively affect performance.texts=[text.replace("\n"," ")fortextintexts]_model_kwargs=self.model_kwargsor{}responses=awaitself.async_client.post(json={"inputs":texts,"parameters":_model_kwargs},task=self.task)returnjson.loads(responses.decode())
[docs]defembed_query(self,text:str)->List[float]:"""Call out to HuggingFaceHub's embedding endpoint for embedding query text. Args: text: The text to embed. Returns: Embeddings for the text. """response=self.embed_documents([text])[0]returnresponse
[docs]asyncdefaembed_query(self,text:str)->List[float]:"""Async Call to HuggingFaceHub's embedding endpoint for embedding query text. Args: text: The text to embed. Returns: Embeddings for the text. """response=(awaitself.aembed_documents([text]))[0]returnresponse