[docs]classLlamaCppEmbeddings(BaseModel,Embeddings):"""llama.cpp embedding models. To use, you should have the llama-cpp-python library installed, and provide the path to the Llama model as a named parameter to the constructor. Check out: https://github.com/abetlen/llama-cpp-python Example: .. code-block:: python from langchain_community.embeddings import LlamaCppEmbeddings llama = LlamaCppEmbeddings(model_path="/path/to/model.bin") """client:Any=None#: :meta private:model_path:str=Field(default="")n_ctx:int=Field(512,alias="n_ctx")"""Token context window."""n_parts:int=Field(-1,alias="n_parts")"""Number of parts to split the model into. If -1, the number of parts is automatically determined."""seed:int=Field(-1,alias="seed")"""Seed. If -1, a random seed is used."""f16_kv:bool=Field(False,alias="f16_kv")"""Use half-precision for key/value cache."""logits_all:bool=Field(False,alias="logits_all")"""Return logits for all tokens, not just the last token."""vocab_only:bool=Field(False,alias="vocab_only")"""Only load the vocabulary, no weights."""use_mlock:bool=Field(False,alias="use_mlock")"""Force system to keep model in RAM."""n_threads:Optional[int]=Field(None,alias="n_threads")"""Number of threads to use. If None, the number of threads is automatically determined."""n_batch:Optional[int]=Field(512,alias="n_batch")"""Number of tokens to process in parallel. Should be a number between 1 and n_ctx."""n_gpu_layers:Optional[int]=Field(None,alias="n_gpu_layers")"""Number of layers to be loaded into gpu memory. Default None."""verbose:bool=Field(True,alias="verbose")"""Print verbose output to stderr."""device:Optional[str]=Field(None,alias="device")"""Device type to use and pass to the model"""model_config=ConfigDict(extra="forbid",protected_namespaces=(),)@model_validator(mode="after")defvalidate_environment(self)->Self:"""Validate that llama-cpp-python library is installed."""model_path=self.model_pathmodel_param_names=["n_ctx","n_parts","seed","f16_kv","logits_all","vocab_only","use_mlock","n_threads","n_batch","verbose","device",]model_params={k:getattr(self,k)forkinmodel_param_names}# For backwards compatibility, only include if non-null.ifself.n_gpu_layersisnotNone:model_params["n_gpu_layers"]=self.n_gpu_layersifnotself.client:try:fromllama_cppimportLlamaself.client=Llama(model_path,embedding=True,**model_params)exceptImportError:raiseImportError("Could not import llama-cpp-python library. ""Please install the llama-cpp-python library to ""use this embedding model: pip install llama-cpp-python")exceptExceptionase:raiseValueError(f"Could not load Llama model from path: {model_path}. "f"Received error {e}")returnself
[docs]defembed_documents(self,texts:List[str])->List[List[float]]:"""Embed a list of documents using the Llama model. Args: texts: The list of texts to embed. Returns: List of embeddings, one for each text. """embeddings=self.client.create_embedding(texts)final_embeddings=[]foreinembeddings["data"]:try:ifisinstance(e["embedding"][0],list):fordataine["embedding"]:final_embeddings.append(list(map(float,data)))else:final_embeddings.append(list(map(float,e["embedding"])))except(IndexError,TypeError):final_embeddings.append(list(map(float,e["embedding"])))returnfinal_embeddings
[docs]defembed_query(self,text:str)->List[float]:"""Embed a query using the Llama model. Args: text: The text to embed. Returns: Embeddings for the text. """embedding=self.client.embed(text)ifembeddingandisinstance(embedding,list)andisinstance(embedding[0],list):returnlist(map(float,embedding[0]))else:returnlist(map(float,embedding))