Source code for langchain_community.vectorstores.tencentvectordb
"""Wrapper around the Tencent vector database."""from__future__importannotationsimportjsonimportloggingimporttimefromenumimportEnumfromtypingimport(Any,Callable,Dict,Iterable,List,Optional,Sequence,Tuple,Union,cast,)importnumpyasnpfromlangchain_core.documentsimportDocumentfromlangchain_core.embeddingsimportEmbeddingsfromlangchain_core.utilsimportguard_importfromlangchain_core.vectorstoresimportVectorStorefrompydanticimportBaseModelfromlangchain_community.vectorstores.utilsimportmaximal_marginal_relevancelogger=logging.getLogger(__name__)META_FIELD_TYPE_UINT64="uint64"META_FIELD_TYPE_STRING="string"META_FIELD_TYPE_ARRAY="array"META_FIELD_TYPE_VECTOR="vector"META_FIELD_TYPES=[META_FIELD_TYPE_UINT64,META_FIELD_TYPE_STRING,META_FIELD_TYPE_ARRAY,META_FIELD_TYPE_VECTOR,]
[docs]classConnectionParams:"""Tencent vector DB Connection params. See the following documentation for details: https://cloud.tencent.com/document/product/1709/95820 Attribute: url (str) : The access address of the vector database server that the client needs to connect to. key (str): API key for client to access the vector database server, which is used for authentication. username (str) : Account for client to access the vector database server. timeout (int) : Request Timeout. """
[docs]classIndexParams:"""Tencent vector DB Index params. See the following documentation for details: https://cloud.tencent.com/document/product/1709/95826 """
[docs]classTencentVectorDB(VectorStore):"""Tencent VectorDB as a vector store. In order to use this you need to have a database instance. See the following documentation for details: https://cloud.tencent.com/document/product/1709/104489 """field_id:str="id"field_vector:str="vector"field_text:str="text"field_metadata:str="metadata"
[docs]def__init__(self,embedding:Embeddings,connection_params:ConnectionParams,index_params:IndexParams=IndexParams(768),database_name:str="LangChainDatabase",collection_name:str="LangChainCollection",drop_old:Optional[bool]=False,collection_description:Optional[str]="Collection for LangChain",meta_fields:Optional[List[MetaField]]=None,t_vdb_embedding:Optional[str]="bge-base-zh",):self.document=guard_import("tcvectordb.model.document")tcvectordb=guard_import("tcvectordb")tcollection=guard_import("tcvectordb.model.collection")enum=guard_import("tcvectordb.model.enum")self.embedding_model=NoneifembeddingisNoneandt_vdb_embedding:embedding_model=[modelformodelinenum.EmbeddingModelift_vdb_embedding==model.model_name]ifnotany(embedding_model):raiseValueError(f"embedding model `{t_vdb_embedding}` is invalid. "f"choices: {[member.model_nameformemberinenum.EmbeddingModel]}")self.embedding_model=tcollection.Embedding(vector_field="vector",field="text",model=embedding_model[0])self.embedding_func=embeddingself.index_params=index_paramsself.collection_description=collection_descriptionself.vdb_client=tcvectordb.VectorDBClient(url=connection_params.url,username=connection_params.username,key=connection_params.key,timeout=connection_params.timeout,)self.meta_fields=meta_fieldsdb_list=self.vdb_client.list_databases()db_exist:bool=Falsefordbindb_list:ifdatabase_name==db.database_name:db_exist=Truebreakifdb_exist:self.database=self.vdb_client.database(database_name)else:self.database=self.vdb_client.create_database(database_name)try:self.collection=self.database.describe_collection(collection_name)ifdrop_old:self.database.drop_collection(collection_name)self._create_collection(collection_name)excepttcvectordb.exceptions.VectorDBException:self._create_collection(collection_name)
[docs]defdelete(self,ids:Optional[List[str]]=None,filter_expr:Optional[str]=None,**kwargs:Any,)->Optional[bool]:"""Delete documents from the collection."""delete_attrs={}ifids:delete_attrs["ids"]=idsiffilter_expr:delete_attrs["filter"]=self.document.Filter(filter_expr)self.collection.delete(**delete_attrs)returnTrue
[docs]@classmethoddeffrom_texts(cls,texts:List[str],embedding:Embeddings,metadatas:Optional[List[dict]]=None,connection_params:Optional[ConnectionParams]=None,index_params:Optional[IndexParams]=None,database_name:str="LangChainDatabase",collection_name:str="LangChainCollection",drop_old:Optional[bool]=False,collection_description:Optional[str]="Collection for LangChain",meta_fields:Optional[List[MetaField]]=None,t_vdb_embedding:Optional[str]="bge-base-zh",**kwargs:Any,)->TencentVectorDB:"""Create a collection, indexes it with HNSW, and insert data."""iflen(texts)==0:raiseValueError("texts is empty")ifconnection_paramsisNone:raiseValueError("connection_params is empty")enum=guard_import("tcvectordb.model.enum")ifembeddingisNoneandt_vdb_embeddingisNone:raiseValueError("embedding and t_vdb_embedding cannot be both None")ifembedding:embeddings=embedding.embed_documents(texts[0:1])dimension=len(embeddings[0])else:embedding_model=[modelformodelinenum.EmbeddingModelift_vdb_embedding==model.model_name]ifnotany(embedding_model):raiseValueError(f"embedding model `{t_vdb_embedding}` is invalid. "f"choices: {[member.model_nameformemberinenum.EmbeddingModel]}")dimension=embedding_model[0]._EmbeddingModel__dimensionsifindex_paramsisNone:index_params=IndexParams(dimension=dimension)else:index_params.dimension=dimensionvector_db=cls(embedding=embedding,connection_params=connection_params,index_params=index_params,database_name=database_name,collection_name=collection_name,drop_old=drop_old,collection_description=collection_description,meta_fields=meta_fields,t_vdb_embedding=t_vdb_embedding,)vector_db.add_texts(texts=texts,metadatas=metadatas)returnvector_db
[docs]defadd_texts(self,texts:Iterable[str],metadatas:Optional[List[dict]]=None,timeout:Optional[int]=None,batch_size:int=1000,ids:Optional[List[str]]=None,**kwargs:Any,)->List[str]:"""Insert text data into TencentVectorDB."""texts=list(texts)iflen(texts)==0:logger.debug("Nothing to insert, skipping.")return[]ifself.embedding_func:embeddings=self.embedding_func.embed_documents(texts)else:embeddings=[]pks:list[str]=[]total_count=len(texts)forstartinrange(0,total_count,batch_size):# Grab end indexdocs=[]end=min(start+batch_size,total_count)foridinrange(start,end,1):metadata=(self._get_meta(metadatas[id])ifmetadatasandmetadatas[id]else{})doc_id=ids[id]ifidselseNonedoc_attrs:Dict[str,Any]={"id":doc_idor"{}-{}-{}".format(time.time_ns(),hash(texts[id]),id)}ifembeddings:doc_attrs["vector"]=embeddings[id]doc_attrs["text"]=texts[id]doc_attrs.update(metadata)doc=self.document.Document(**doc_attrs)docs.append(doc)pks.append(doc_attrs["id"])self.collection.upsert(docs,timeout)returnpks
[docs]defsimilarity_search(self,query:str,k:int=4,param:Optional[dict]=None,expr:Optional[str]=None,timeout:Optional[int]=None,**kwargs:Any,)->List[Document]:"""Perform a similarity search against the query string."""res=self.similarity_search_with_score(query=query,k=k,param=param,expr=expr,timeout=timeout,**kwargs)return[docfordoc,_inres]
[docs]defsimilarity_search_with_score(self,query:str,k:int=4,param:Optional[dict]=None,expr:Optional[str]=None,timeout:Optional[int]=None,**kwargs:Any,)->List[Tuple[Document,float]]:"""Perform a search on a query string and return results with score."""# Embed the query text.ifself.embedding_func:embedding=self.embedding_func.embed_query(query)returnself.similarity_search_with_score_by_vector(embedding=embedding,k=k,param=param,expr=expr,timeout=timeout,**kwargs,)returnself.similarity_search_with_score_by_vector(embedding=[],k=k,param=param,expr=expr,timeout=timeout,query=query,**kwargs,)
[docs]defsimilarity_search_by_vector(self,embedding:List[float],k:int=4,param:Optional[dict]=None,expr:Optional[str]=None,timeout:Optional[int]=None,**kwargs:Any,)->List[Document]:"""Perform a similarity search against the query string."""docs=self.similarity_search_with_score_by_vector(embedding=embedding,k=k,param=param,expr=expr,timeout=timeout,**kwargs)return[docfordoc,_indocs]
[docs]defsimilarity_search_with_score_by_vector(self,embedding:List[float],k:int=4,param:Optional[dict]=None,expr:Optional[str]=None,filter:Optional[str]=None,timeout:Optional[int]=None,query:Optional[str]=None,**kwargs:Any,)->List[Tuple[Document,float]]:"""Perform a search on a query string and return results with score."""iffilterandnotexpr:expr=translate_filter(filter,[f.nameforfin(self.meta_fieldsor[])iff.index])search_args={"filter":self.document.Filter(expr)ifexprelseNone,"params":self.document.HNSWSearchParams(ef=(paramor{}).get("ef",10)),"retrieve_vector":False,"limit":k,"timeout":timeout,}ifquery:search_args["embeddingItems"]=[query]res:List[List[Dict]]=self.collection.searchByText(**search_args).get("documents")else:search_args["vectors"]=[embedding]res=self.collection.search(**search_args)ret:List[Tuple[Document,float]]=[]ifresisNoneorlen(res)==0:returnretforresultinres[0]:meta=self._get_meta(result)doc=Document(page_content=result.get(self.field_text),metadata=meta)# type: ignore[arg-type]pair=(doc,result.get("score",0.0))ret.append(pair)returnret
[docs]defmax_marginal_relevance_search(self,query:str,k:int=4,fetch_k:int=20,lambda_mult:float=0.5,param:Optional[dict]=None,expr:Optional[str]=None,timeout:Optional[int]=None,**kwargs:Any,)->List[Document]:"""Perform a search and return results that are reordered by MMR."""ifself.embedding_func:embedding=self.embedding_func.embed_query(query)returnself.max_marginal_relevance_search_by_vector(embedding=embedding,k=k,fetch_k=fetch_k,lambda_mult=lambda_mult,param=param,expr=expr,timeout=timeout,**kwargs,)# tvdb will do the query embeddingdocs=self.similarity_search_with_score(query=query,k=fetch_k,param=param,expr=expr,timeout=timeout,**kwargs)return[docfordoc,_indocs]
def_get_meta(self,result:Dict)->Dict:"""Get metadata from the result."""ifself.meta_fields:return{field.name:result.get(field.name)forfieldinself.meta_fields}elifresult.get(self.field_metadata):raw_meta=result.get(self.field_metadata)ifraw_metaandisinstance(raw_meta,str):returnjson.loads(raw_meta)return{}
[docs]defmax_marginal_relevance_search_by_vector(self,embedding:list[float],k:int=4,fetch_k:int=20,lambda_mult:float=0.5,param:Optional[dict]=None,expr:Optional[str]=None,filter:Optional[str]=None,timeout:Optional[int]=None,**kwargs:Any,)->List[Document]:"""Perform a search and return results that are reordered by MMR."""iffilterandnotexpr:expr=translate_filter(filter,[f.nameforfin(self.meta_fieldsor[])iff.index])res:List[List[Dict]]=self.collection.search(vectors=[embedding],filter=self.document.Filter(expr)ifexprelseNone,params=self.document.HNSWSearchParams(ef=(paramor{}).get("ef",10)),retrieve_vector=True,limit=fetch_k,timeout=timeout,)# Organize results.documents=[]ordered_result_embeddings=[]forresultinres[0]:meta=self._get_meta(result)doc=Document(page_content=result.get(self.field_text),metadata=meta)# type: ignore[arg-type]documents.append(doc)ordered_result_embeddings.append(result.get(self.field_vector))# Get the new order of results.new_ordering=maximal_marginal_relevance(np.array(embedding),ordered_result_embeddings,k=k,lambda_mult=lambda_mult)# Reorder the values and return.return[documents[x]forxinnew_orderingifx!=-1]
def_select_relevance_score_fn(self)->Callable[[float],float]:metric_type=self.index_params.metric_typeifmetric_type=="COSINE":returnself._cosine_relevance_score_fnelifmetric_type=="L2":returnself._euclidean_relevance_score_fnelifmetric_type=="IP":returnself._max_inner_product_relevance_score_fnelse:raiseValueError("No supported normalization function"f" for distance metric of type: {metric_type}.")