[docs]classHuggingFaceEmbeddings(BaseModel,Embeddings):"""HuggingFace sentence_transformers embedding models. To use, you should have the ``sentence_transformers`` python package installed. Example: .. code-block:: python from langchain_huggingface import HuggingFaceEmbeddings model_name = "sentence-transformers/all-mpnet-base-v2" model_kwargs = {'device': 'cpu'} encode_kwargs = {'normalize_embeddings': False} hf = HuggingFaceEmbeddings( model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs ) """model_name:str=Field(default=DEFAULT_MODEL_NAME,alias="model")"""Model name to use."""cache_folder:Optional[str]=None"""Path to store models. Can be also set by SENTENCE_TRANSFORMERS_HOME environment variable."""model_kwargs:Dict[str,Any]=Field(default_factory=dict)"""Keyword arguments to pass to the Sentence Transformer model, such as `device`, `prompts`, `default_prompt_name`, `revision`, `trust_remote_code`, or `token`. See also the Sentence Transformer documentation: https://sbert.net/docs/package_reference/SentenceTransformer.html#sentence_transformers.SentenceTransformer"""encode_kwargs:Dict[str,Any]=Field(default_factory=dict)"""Keyword arguments to pass when calling the `encode` method for the documents of the Sentence Transformer model, such as `prompt_name`, `prompt`, `batch_size`, `precision`, `normalize_embeddings`, and more. See also the Sentence Transformer documentation: https://sbert.net/docs/package_reference/SentenceTransformer.html#sentence_transformers.SentenceTransformer.encode"""query_encode_kwargs:Dict[str,Any]=Field(default_factory=dict)"""Keyword arguments to pass when calling the `encode` method for the query of the Sentence Transformer model, such as `prompt_name`, `prompt`, `batch_size`, `precision`, `normalize_embeddings`, and more. See also the Sentence Transformer documentation: https://sbert.net/docs/package_reference/SentenceTransformer.html#sentence_transformers.SentenceTransformer.encode"""multi_process:bool=False"""Run encode() on multiple GPUs."""show_progress:bool=False"""Whether to show a progress bar."""def__init__(self,**kwargs:Any):"""Initialize the sentence_transformer."""super().__init__(**kwargs)try:importsentence_transformers# type: ignore[import]exceptImportErrorasexc:raiseImportError("Could not import sentence_transformers python package. ""Please install it with `pip install sentence-transformers`.")fromexcifself.model_kwargs.get("backend","torch")=="ipex":ifnotis_optimum_intel_available()ornotis_ipex_available():raiseImportError(f'Backend: ipex {IMPORT_ERROR.format("optimum[ipex]")}')ifis_optimum_intel_version("<",_MIN_OPTIMUM_VERSION):raiseImportError(f"Backend: ipex requires optimum-intel>="f"{_MIN_OPTIMUM_VERSION}. You can install it with pip: ""`pip install --upgrade --upgrade-strategy eager ""`optimum[ipex]`.")fromoptimum.intelimportIPEXSentenceTransformer# type: ignore[import]model_cls=IPEXSentenceTransformerelse:model_cls=sentence_transformers.SentenceTransformerself._client=model_cls(self.model_name,cache_folder=self.cache_folder,**self.model_kwargs)model_config=ConfigDict(extra="forbid",protected_namespaces=(),populate_by_name=True,)def_embed(self,texts:list[str],encode_kwargs:Dict[str,Any])->List[List[float]]:""" Embed a text using the HuggingFace transformer model. Args: texts: The list of texts to embed. encode_kwargs: Keyword arguments to pass when calling the `encode` method for the documents of the SentenceTransformer encode method. Returns: List of embeddings, one for each text. """importsentence_transformers# type: ignore[import]texts=list(map(lambdax:x.replace("\n"," "),texts))ifself.multi_process:pool=self._client.start_multi_process_pool()embeddings=self._client.encode_multi_process(texts,pool)sentence_transformers.SentenceTransformer.stop_multi_process_pool(pool)else:embeddings=self._client.encode(texts,show_progress_bar=self.show_progress,**encode_kwargs,# type: ignore)ifisinstance(embeddings,list):raiseTypeError("Expected embeddings to be a Tensor or a numpy array, ""got a list instead.")returnembeddings.tolist()
[docs]defembed_documents(self,texts:List[str])->List[List[float]]:"""Compute doc embeddings using a HuggingFace transformer model. Args: texts: The list of texts to embed. Returns: List of embeddings, one for each text. """returnself._embed(texts,self.encode_kwargs)
[docs]defembed_query(self,text:str)->List[float]:"""Compute query embeddings using a HuggingFace transformer model. Args: text: The text to embed. Returns: Embeddings for the text. """embed_kwargs=(self.query_encode_kwargsiflen(self.query_encode_kwargs)>0elseself.encode_kwargs)returnself._embed([text],embed_kwargs)[0]