[docs]classAscendEmbeddings(Embeddings,BaseModel):""" Ascend NPU accelerate Embedding model Please ensure that you have installed CANN and torch_npu. Example: from langchain_community.embeddings import AscendEmbeddings model = AscendEmbeddings(model_path=<path_to_model>, device_id=0, query_instruction="Represent this sentence for searching relevant passages: " ) """"""model path"""model_path:str"""Ascend NPU device id."""device_id:int=0"""Unstruntion to used for embedding query."""query_instruction:str="""""Unstruntion to used for embedding document."""document_instruction:str=""use_fp16:bool=Truepooling_method:Optional[str]="cls"batch_size:int=32model:Anytokenizer:Anymodel_config=ConfigDict(protected_namespaces=())def__init__(self,*args:Any,**kwargs:Any)->None:super().__init__(*args,**kwargs)try:fromtransformersimportAutoModel,AutoTokenizerexceptImportErrorase:raiseImportError("Unable to import transformers, please install with ""`pip install -U transformers`.")frometry:self.model=AutoModel.from_pretrained(self.model_path).npu().eval()self.tokenizer=AutoTokenizer.from_pretrained(self.model_path)exceptExceptionase:raiseException(f"Failed to load model [self.model_path], due to following error:{e}")ifself.use_fp16:self.model.half()self.encode([f"warmup {i} times"foriinrange(10)])@model_validator(mode="before")@classmethoddefvalidate_environment(cls,values:Dict)->Any:if"model_path"notinvalues:raiseValueError("model_path is required")ifnotos.access(values["model_path"],os.F_OK):raiseFileNotFoundError(f"Unable to find valid model path in [{values['model_path']}]")try:importtorch_npuexceptImportError:raiseModuleNotFoundError("torch_npu not found, please install torch_npu")exceptExceptionase:raiseetry:torch_npu.npu.set_device(values["device_id"])exceptExceptionase:raiseException(f"set device failed due to {e}")returnvalues
[docs]defencode(self,sentences:Any)->Any:inputs=self.tokenizer(sentences,padding=True,truncation=True,return_tensors="pt",max_length=512,)try:importtorchexceptImportErrorase:raiseImportError("Unable to import torch, please install with `pip install -U torch`.")fromelast_hidden_state=self.model(inputs.input_ids.npu(),inputs.attention_mask.npu(),return_dict=True).last_hidden_statetmp=self.pooling(last_hidden_state,inputs["attention_mask"].npu())embeddings=torch.nn.functional.normalize(tmp,dim=-1)returnembeddings.cpu().detach().numpy()
[docs]defpooling(self,last_hidden_state:Any,attention_mask:Any=None)->Any:try:importtorchexceptImportErrorase:raiseImportError("Unable to import torch, please install with `pip install -U torch`.")fromeifself.pooling_method=="cls":returnlast_hidden_state[:,0]elifself.pooling_method=="mean":s=torch.sum(last_hidden_state*attention_mask.unsqueeze(-1).float(),dim=-1)d=attention_mask.sum(dim=1,keepdim=True).float()returns/delse:raiseNotImplementedError(f"Pooling method [{self.pooling_method}] not implemented")
[docs]defembed_documents(self,texts:List[str])->List[List[float]]:try:importnumpyasnpexceptImportErrorase:raiseImportError("Unable to import numpy, please install with `pip install -U numpy`.")fromeembedding_list=[]foriinrange(0,len(texts),self.batch_size):texts_=texts[i:i+self.batch_size]emb=self.encode([self.document_instruction+textfortextintexts_])embedding_list.append(emb)returnnp.concatenate(embedding_list)