[docs]classDashVector(VectorStore):"""`DashVector` vector store. To use, you should have the ``dashvector`` python package installed. Example: .. code-block:: python from langchain_community.vectorstores import DashVector from langchain_community.embeddings.openai import OpenAIEmbeddings import dashvector client = dashvector.Client(api_key="***") client.create("langchain", dimension=1024) collection = client.get("langchain") embeddings = OpenAIEmbeddings() vectorstore = DashVector(collection, embeddings.embed_query, "text") """
[docs]def__init__(self,collection:Any,embedding:Embeddings,text_field:str,):"""Initialize with DashVector collection."""try:importdashvectorexceptImportError:raiseImportError("Could not import dashvector python package. ""Please install it with `pip install dashvector`.")ifnotisinstance(collection,dashvector.Collection):raiseValueError(f"collection should be an instance of dashvector.Collection, "f"bug got {type(collection)}")self._collection=collectionself._embedding=embeddingself._text_field=text_field
def_create_partition_if_not_exists(self,partition:str)->None:"""Create a Partition in current Collection."""self._collection.create_partition(partition)def_similarity_search_with_score_by_vector(self,embedding:List[float],k:int=4,filter:Optional[str]=None,partition:str="default",)->List[Tuple[Document,float]]:"""Return docs most similar to query vector, along with scores"""# query by vectorret=self._collection.query(embedding,topk=k,filter=filter,partition=partition)ifnotret:raiseValueError(f"Fail to query docs by vector, error {self._collection.message}")docs=[]fordocinret:metadata=doc.fieldstext=metadata.pop(self._text_field)score=doc.scoredocs.append((Document(page_content=text,metadata=metadata),score))returndocs
[docs]defadd_texts(self,texts:Iterable[str],metadatas:Optional[List[dict]]=None,ids:Optional[List[str]]=None,batch_size:int=25,partition:str="default",**kwargs:Any,)->List[str]:"""Run more texts through the embeddings and add to the vectorstore. Args: texts: Iterable of strings to add to the vectorstore. metadatas: Optional list of metadatas associated with the texts. ids: Optional list of ids associated with the texts. batch_size: Optional batch size to upsert docs. partition: a partition name in collection. [optional]. kwargs: vectorstore specific parameters Returns: List of ids from adding the texts into the vectorstore. """self._create_partition_if_not_exists(partition)ids=idsor[str(uuid.uuid4().hex)for_intexts]text_list=list(texts)foriinrange(0,len(text_list),batch_size):# batch endend=min(i+batch_size,len(text_list))batch_texts=text_list[i:end]batch_ids=ids[i:end]batch_embeddings=self._embedding.embed_documents(list(batch_texts))# batch metadatasifmetadatas:batch_metadatas=metadatas[i:end]else:batch_metadatas=[{}for_inrange(i,end)]formetadata,textinzip(batch_metadatas,batch_texts):metadata[self._text_field]=text# batch upsert to collectiondocs=list(zip(batch_ids,batch_embeddings,batch_metadatas))ret=self._collection.upsert(docs,partition=partition)ifnotret:raiseValueError(f"Fail to upsert docs to dashvector vector database,"f"Error: {ret.message}")returnids
[docs]defdelete(self,ids:Optional[List[str]]=None,partition:str="default",**kwargs:Any)->bool:"""Delete by vector ID. Args: ids: List of ids to delete. partition: a partition name in collection. [optional]. Returns: True if deletion is successful, False otherwise. """returnbool(self._collection.delete(ids,partition=partition))
[docs]defsimilarity_search(self,query:str,k:int=4,filter:Optional[str]=None,partition:str="default",**kwargs:Any,)->List[Document]:"""Return docs most similar to query. Args: query: Text to search documents similar to. k: Number of documents to return. Default to 4. filter: Doc fields filter conditions that meet the SQL where clause specification. partition: a partition name in collection. [optional]. Returns: List of Documents most similar to the query text. """docs_and_scores=self.similarity_search_with_relevance_scores(query,k,filter,partition)return[docfordoc,_indocs_and_scores]
[docs]defsimilarity_search_with_relevance_scores(self,query:str,k:int=4,filter:Optional[str]=None,partition:str="default",**kwargs:Any,)->List[Tuple[Document,float]]:"""Return docs most similar to query text , alone with relevance scores. Less is more similar, more is more dissimilar. Args: query: input text k: Number of Documents to return. Defaults to 4. filter: Doc fields filter conditions that meet the SQL where clause specification. partition: a partition name in collection. [optional]. Returns: List of Tuples of (doc, similarity_score) """embedding=self._embedding.embed_query(query)returnself._similarity_search_with_score_by_vector(embedding,k=k,filter=filter,partition=partition)
[docs]defsimilarity_search_by_vector(self,embedding:List[float],k:int=4,filter:Optional[str]=None,partition:str="default",**kwargs:Any,)->List[Document]:"""Return docs most similar to embedding vector. Args: embedding: Embedding to look up documents similar to. k: Number of Documents to return. Defaults to 4. filter: Doc fields filter conditions that meet the SQL where clause specification. partition: a partition name in collection. [optional]. Returns: List of Documents most similar to the query vector. """docs_and_scores=self._similarity_search_with_score_by_vector(embedding,k,filter,partition)return[docfordoc,_indocs_and_scores]
[docs]defmax_marginal_relevance_search(self,query:str,k:int=4,fetch_k:int=20,lambda_mult:float=0.5,filter:Optional[dict]=None,partition:str="default",**kwargs:Any,)->List[Document]:"""Return docs selected using the maximal marginal relevance. Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents. Args: query: Text to look up documents similar to. k: Number of Documents to return. Defaults to 4. fetch_k: Number of Documents to fetch to pass to MMR algorithm. lambda_mult: Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. filter: Doc fields filter conditions that meet the SQL where clause specification. partition: a partition name in collection. [optional]. Returns: List of Documents selected by maximal marginal relevance. """embedding=self._embedding.embed_query(query)returnself.max_marginal_relevance_search_by_vector(embedding,k,fetch_k,lambda_mult,filter,partition)
[docs]defmax_marginal_relevance_search_by_vector(self,embedding:List[float],k:int=4,fetch_k:int=20,lambda_mult:float=0.5,filter:Optional[dict]=None,partition:str="default",**kwargs:Any,)->List[Document]:"""Return docs selected using the maximal marginal relevance. Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents. Args: embedding: Embedding to look up documents similar to. k: Number of Documents to return. Defaults to 4. fetch_k: Number of Documents to fetch to pass to MMR algorithm. lambda_mult: Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. filter: Doc fields filter conditions that meet the SQL where clause specification. partition: a partition name in collection. [optional]. Returns: List of Documents selected by maximal marginal relevance. """# query by vectorret=self._collection.query(embedding,topk=fetch_k,filter=filter,partition=partition,include_vector=True,)ifnotret:raiseValueError(f"Fail to query docs by vector, error {self._collection.message}")candidate_embeddings=[doc.vectorfordocinret]mmr_selected=maximal_marginal_relevance(np.array(embedding),candidate_embeddings,lambda_mult,k)metadatas=[ret.output[i].fieldsforiinmmr_selected]return[Document(page_content=metadata.pop(self._text_field),metadata=metadata)formetadatainmetadatas]
[docs]@classmethoddeffrom_texts(cls,texts:List[str],embedding:Embeddings,metadatas:Optional[List[dict]]=None,dashvector_api_key:Optional[str]=None,dashvector_endpoint:Optional[str]=None,collection_name:str="langchain",text_field:str="text",batch_size:int=25,ids:Optional[List[str]]=None,**kwargs:Any,)->DashVector:"""Return DashVector VectorStore initialized from texts and embeddings. This is the quick way to get started with dashvector vector store. Example: .. code-block:: python from langchain_community.vectorstores import DashVector from langchain_community.embeddings import OpenAIEmbeddings import dashvector embeddings = OpenAIEmbeddings() dashvector = DashVector.from_documents( docs, embeddings, dashvector_api_key="{DASHVECTOR_API_KEY}" ) """try:importdashvectorexceptImportError:raiseImportError("Could not import dashvector python package. ""Please install it with `pip install dashvector`.")dashvector_api_key=dashvector_api_keyorget_from_env("dashvector_api_key","DASHVECTOR_API_KEY")dashvector_endpoint=dashvector_endpointorget_from_env("dashvector_endpoint","DASHVECTOR_ENDPOINT",default="dashvector.cn-hangzhou.aliyuncs.com",)dashvector_client=dashvector.Client(api_key=dashvector_api_key,endpoint=dashvector_endpoint)dashvector_client.delete(collection_name)collection=dashvector_client.get(collection_name)ifnotcollection:dim=len(embedding.embed_query(texts[0]))# create collection if not existedresp=dashvector_client.create(collection_name,dimension=dim)ifresp:collection=dashvector_client.get(collection_name)else:raiseValueError(f"Fail to create collection. Error: {resp.message}.")dashvector_vector_db=cls(collection,embedding,text_field)dashvector_vector_db.add_texts(texts,metadatas,ids,batch_size)returndashvector_vector_db