Source code for langchain_community.embeddings.self_hosted_hugging_face
importimportlibimportloggingfromtypingimportAny,Callable,List,Optionalfromlangchain_community.embeddings.self_hostedimportSelfHostedEmbeddingsDEFAULT_MODEL_NAME="sentence-transformers/all-mpnet-base-v2"DEFAULT_INSTRUCT_MODEL="hkunlp/instructor-large"DEFAULT_EMBED_INSTRUCTION="Represent the document for retrieval: "DEFAULT_QUERY_INSTRUCTION=("Represent the question for retrieving supporting documents: ")logger=logging.getLogger(__name__)def_embed_documents(client:Any,*args:Any,**kwargs:Any)->List[List[float]]:"""Inference function to send to the remote hardware. Accepts a sentence_transformer model_id and returns a list of embeddings for each document in the batch. """returnclient.encode(*args,**kwargs)
[docs]defload_embedding_model(model_id:str,instruct:bool=False,device:int=0)->Any:"""Load the embedding model."""ifnotinstruct:importsentence_transformersclient=sentence_transformers.SentenceTransformer(model_id)else:fromInstructorEmbeddingimportINSTRUCTORclient=INSTRUCTOR(model_id)ifimportlib.util.find_spec("torch")isnotNone:importtorchcuda_device_count=torch.cuda.device_count()ifdevice<-1or(device>=cuda_device_count):raiseValueError(f"Got device=={device}, "f"device is required to be within [-1, {cuda_device_count})")ifdevice<0andcuda_device_count>0:logger.warning("Device has %d GPUs available. ""Provide device={deviceId} to `from_model_id` to use available""GPUs for execution. deviceId is -1 for CPU and ""can be a positive integer associated with CUDA device id.",cuda_device_count,)client=client.to(device)returnclient
[docs]classSelfHostedHuggingFaceEmbeddings(SelfHostedEmbeddings):"""HuggingFace embedding models on self-hosted remote hardware. Supported hardware includes auto-launched instances on AWS, GCP, Azure, and Lambda, as well as servers specified by IP address and SSH credentials (such as on-prem, or another cloud like Paperspace, Coreweave, etc.). To use, you should have the ``runhouse`` python package installed. Example: .. code-block:: python from langchain_community.embeddings import SelfHostedHuggingFaceEmbeddings import runhouse as rh model_id = "sentence-transformers/all-mpnet-base-v2" gpu = rh.cluster(name="rh-a10x", instance_type="A100:1") hf = SelfHostedHuggingFaceEmbeddings(model_id=model_id, hardware=gpu) """client:Any#: :meta private:model_id:str=DEFAULT_MODEL_NAME"""Model name to use."""model_reqs:List[str]=["./","sentence_transformers","torch"]"""Requirements to install on hardware to inference the model."""hardware:Any"""Remote hardware to send the inference function to."""model_load_fn:Callable=load_embedding_model"""Function to load the model remotely on the server."""load_fn_kwargs:Optional[dict]=None"""Keyword arguments to pass to the model load function."""inference_fn:Callable=_embed_documents"""Inference function to extract the embeddings."""def__init__(self,**kwargs:Any):"""Initialize the remote inference function."""load_fn_kwargs=kwargs.pop("load_fn_kwargs",{})load_fn_kwargs["model_id"]=load_fn_kwargs.get("model_id",DEFAULT_MODEL_NAME)load_fn_kwargs["instruct"]=load_fn_kwargs.get("instruct",False)load_fn_kwargs["device"]=load_fn_kwargs.get("device",0)super().__init__(load_fn_kwargs=load_fn_kwargs,**kwargs)
[docs]classSelfHostedHuggingFaceInstructEmbeddings(SelfHostedHuggingFaceEmbeddings):"""HuggingFace InstructEmbedding models on self-hosted remote hardware. Supported hardware includes auto-launched instances on AWS, GCP, Azure, and Lambda, as well as servers specified by IP address and SSH credentials (such as on-prem, or another cloud like Paperspace, Coreweave, etc.). To use, you should have the ``runhouse`` python package installed. Example: .. code-block:: python from langchain_community.embeddings import SelfHostedHuggingFaceInstructEmbeddings import runhouse as rh model_name = "hkunlp/instructor-large" gpu = rh.cluster(name='rh-a10x', instance_type='A100:1') hf = SelfHostedHuggingFaceInstructEmbeddings( model_name=model_name, hardware=gpu) """# noqa: E501model_id:str=DEFAULT_INSTRUCT_MODEL"""Model name to use."""embed_instruction:str=DEFAULT_EMBED_INSTRUCTION"""Instruction to use for embedding documents."""query_instruction:str=DEFAULT_QUERY_INSTRUCTION"""Instruction to use for embedding query."""model_reqs:List[str]=["./","InstructorEmbedding","torch"]"""Requirements to install on hardware to inference the model."""def__init__(self,**kwargs:Any):"""Initialize the remote inference function."""load_fn_kwargs=kwargs.pop("load_fn_kwargs",{})load_fn_kwargs["model_id"]=load_fn_kwargs.get("model_id",DEFAULT_INSTRUCT_MODEL)load_fn_kwargs["instruct"]=load_fn_kwargs.get("instruct",True)load_fn_kwargs["device"]=load_fn_kwargs.get("device",0)super().__init__(load_fn_kwargs=load_fn_kwargs,**kwargs)
[docs]defembed_documents(self,texts:List[str])->List[List[float]]:"""Compute doc embeddings using a HuggingFace instruct model. Args: texts: The list of texts to embed. Returns: List of embeddings, one for each text. """instruction_pairs=[]fortextintexts:instruction_pairs.append([self.embed_instruction,text])embeddings=self.client(self.pipeline_ref,instruction_pairs)returnembeddings.tolist()
[docs]defembed_query(self,text:str)->List[float]:"""Compute query embeddings using a HuggingFace instruct model. Args: text: The text to embed. Returns: Embeddings for the text. """instruction_pair=[self.query_instruction,text]embedding=self.client(self.pipeline_ref,[instruction_pair])[0]returnembedding.tolist()