[docs]classDingo(VectorStore):"""`Dingo` vector store. To use, you should have the ``dingodb`` python package installed. Example: .. code-block:: python from langchain_community.vectorstores import Dingo from langchain_community.embeddings.openai import OpenAIEmbeddings embeddings = OpenAIEmbeddings() dingo = Dingo(embeddings, "text") """
[docs]def__init__(self,embedding:Embeddings,text_key:str,*,client:Any=None,index_name:Optional[str]=None,dimension:int=1024,host:Optional[List[str]]=None,user:str="root",password:str="123123",self_id:bool=False,):"""Initialize with Dingo client."""try:importdingodbexceptImportError:raiseImportError("Could not import dingo python package. ""Please install it with `pip install dingodb.")host=hostifhostisnotNoneelse["172.20.31.10:13000"]# collectionifclientisnotNone:dingo_client=clientelse:try:# connect to dingo dbdingo_client=dingodb.DingoDB(user,password,host)exceptValueErrorase:raiseValueError(f"Dingo failed to connect: {e}")self._text_key=text_keyself._client=dingo_clientif(index_nameisnotNoneandindex_namenotindingo_client.get_index()andindex_name.upper()notindingo_client.get_index()):ifself_idisTrue:dingo_client.create_index(index_name,dimension=dimension,auto_id=False)else:dingo_client.create_index(index_name,dimension=dimension)self._index_name=index_nameself._embedding=embedding
[docs]defadd_texts(self,texts:Iterable[str],metadatas:Optional[List[dict]]=None,ids:Optional[List[str]]=None,text_key:str="text",batch_size:int=500,**kwargs:Any,)->List[str]:"""Run more texts through the embeddings and add to the vectorstore. Args: texts: Iterable of strings to add to the vectorstore. metadatas: Optional list of metadatas associated with the texts. ids: Optional list of ids to associate with the texts. Returns: List of ids from adding the texts into the vectorstore. """# Embed and create the documentsids=idsor[str(uuid.uuid4().int)[:13]for_intexts]metadatas_list=[]texts=list(texts)embeds=self._embedding.embed_documents(texts)fori,textinenumerate(texts):metadata=metadatas[i]ifmetadataselse{}metadata[self._text_key]=textmetadatas_list.append(metadata)# upsert to Dingoforiinrange(0,len(list(texts)),batch_size):j=i+batch_sizeadd_res=self._client.vector_add(self._index_name,metadatas_list[i:j],embeds[i:j],ids[i:j])ifnotadd_res:raiseException("vector add fail")returnids
[docs]defsimilarity_search(self,query:str,k:int=4,search_params:Optional[dict]=None,timeout:Optional[int]=None,**kwargs:Any,)->List[Document]:"""Return Dingo documents most similar to query, along with scores. Args: query: Text to look up documents similar to. k: Number of Documents to return. Defaults to 4. search_params: Dictionary of argument(s) to filter on metadata Returns: List of Documents most similar to the query and score for each """docs_and_scores=self.similarity_search_with_score(query,k=k,search_params=search_params,**kwargs)return[docfordoc,_indocs_and_scores]
[docs]defsimilarity_search_with_score(self,query:str,k:int=4,search_params:Optional[dict]=None,timeout:Optional[int]=None,**kwargs:Any,)->List[Tuple[Document,float]]:"""Return Dingo documents most similar to query, along with scores. Args: query: Text to look up documents similar to. k: Number of Documents to return. Defaults to 4. search_params: Dictionary of argument(s) to filter on metadata Returns: List of Documents most similar to the query and score for each """docs=[]query_obj=self._embedding.embed_query(query)results=self._client.vector_search(self._index_name,xq=query_obj,top_k=k,search_params=search_params)ifnotresults:return[]forresinresults[0]["vectorWithDistances"]:score=res["distance"]if("score_threshold"inkwargsandkwargs.get("score_threshold")isnotNone):ifscore>kwargs.get("score_threshold"):continuemetadatas=res["scalarData"]id=res["id"]text=metadatas[self._text_key]["fields"][0]["data"]metadata={"id":id,"text":text,"score":score}formeta_keyinmetadatas.keys():metadata[meta_key]=metadatas[meta_key]["fields"][0]["data"]docs.append((Document(page_content=text,metadata=metadata),score))returndocs
[docs]defmax_marginal_relevance_search_by_vector(self,embedding:List[float],k:int=4,fetch_k:int=20,lambda_mult:float=0.5,search_params:Optional[dict]=None,**kwargs:Any,)->List[Document]:"""Return docs selected using the maximal marginal relevance. Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents. Args: embedding: Embedding to look up documents similar to. k: Number of Documents to return. Defaults to 4. fetch_k: Number of Documents to fetch to pass to MMR algorithm. lambda_mult: Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. Returns: List of Documents selected by maximal marginal relevance. """results=self._client.vector_search(self._index_name,[embedding],search_params=search_params,top_k=k)mmr_selected=maximal_marginal_relevance(np.array([embedding],dtype=np.float32),[item["vector"]["floatValues"]foriteminresults[0]["vectorWithDistances"]],k=k,lambda_mult=lambda_mult,)selected=[]foriinmmr_selected:meta_data={}fork,vinresults[0]["vectorWithDistances"][i]["scalarData"].items():meta_data.update({str(k):v["fields"][0]["data"]})selected.append(meta_data)return[Document(page_content=metadata.pop(self._text_key),metadata=metadata)formetadatainselected]
[docs]defmax_marginal_relevance_search(self,query:str,k:int=4,fetch_k:int=20,lambda_mult:float=0.5,search_params:Optional[dict]=None,**kwargs:Any,)->List[Document]:"""Return docs selected using the maximal marginal relevance. Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents. Args: query: Text to look up documents similar to. k: Number of Documents to return. Defaults to 4. fetch_k: Number of Documents to fetch to pass to MMR algorithm. lambda_mult: Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. Returns: List of Documents selected by maximal marginal relevance. """embedding=self._embedding.embed_query(query)returnself.max_marginal_relevance_search_by_vector(embedding,k,fetch_k,lambda_mult,search_params)
[docs]@classmethoddeffrom_texts(cls,texts:List[str],embedding:Embeddings,metadatas:Optional[List[dict]]=None,ids:Optional[List[str]]=None,text_key:str="text",index_name:Optional[str]=None,dimension:int=1024,client:Any=None,host:List[str]=["172.20.31.10:13000"],user:str="root",password:str="123123",batch_size:int=500,**kwargs:Any,)->Dingo:"""Construct Dingo wrapper from raw documents. This is a user friendly interface that: 1. Embeds documents. 2. Adds the documents to a provided Dingo index This is intended to be a quick way to get started. Example: .. code-block:: python from langchain_community.vectorstores import Dingo from langchain_community.embeddings import OpenAIEmbeddings import dingodb sss embeddings = OpenAIEmbeddings() dingo = Dingo.from_texts( texts, embeddings, index_name="langchain-demo" ) """try:importdingodbexceptImportError:raiseImportError("Could not import dingo python package. ""Please install it with `pip install dingodb`.")ifclientisnotNone:dingo_client=clientelse:try:# connect to dingo dbdingo_client=dingodb.DingoDB(user,password,host)exceptValueErrorase:raiseValueError(f"Dingo failed to connect: {e}")ifkwargsisnotNoneandkwargs.get("self_id")isTrue:if(index_nameisnotNoneandindex_namenotindingo_client.get_index()andindex_name.upper()notindingo_client.get_index()):dingo_client.create_index(index_name,dimension=dimension,auto_id=False)else:if(index_nameisnotNoneandindex_namenotindingo_client.get_index()andindex_name.upper()notindingo_client.get_index()):dingo_client.create_index(index_name,dimension=dimension)# Embed and create the documentsids=idsor[str(uuid.uuid4().int)[:13]for_intexts]metadatas_list=[]texts=list(texts)embeds=embedding.embed_documents(texts)fori,textinenumerate(texts):metadata=metadatas[i]ifmetadataselse{}metadata[text_key]=textmetadatas_list.append(metadata)# upsert to Dingoforiinrange(0,len(list(texts)),batch_size):j=i+batch_sizeadd_res=dingo_client.vector_add(index_name,metadatas_list[i:j],embeds[i:j],ids[i:j])ifnotadd_res:raiseException("vector add fail")returncls(embedding,text_key,client=dingo_client,index_name=index_name)
[docs]defdelete(self,ids:Optional[List[str]]=None,**kwargs:Any,)->Any:"""Delete by vector IDs or filter. Args: ids: List of ids to delete. """ifidsisNone:raiseValueError("No ids provided to delete.")returnself._client.vector_delete(self._index_name,ids=ids)