[docs]classVikingDBConfig(object):"""vikingdb connection config See the following documentation for details: https://www.volcengine.com/docs/6459/1167770 Attribute: host(str):The access address of the vector database server that the client needs to connect to. region(str):"cn-shanghai" or "cn-beijing" ak(str):Access Key ID, security credentials for accessing Volcano Engine services. sk(str):Secret Access Key, security credentials for accessing Volcano Engine services. scheme(str):http or https, defaulting to http. """
[docs]classVikingDB(VectorStore):"""vikingdb as a vector store In order to use this you need to have a database instance. See the following documentation for details: https://www.volcengine.com/docs/6459/1167774 """
[docs]def__init__(self,embedding_function:Embeddings,collection_name:str="LangChainCollection",connection_args:Optional[VikingDBConfig]=None,index_params:Optional[dict]=None,drop_old:Optional[bool]=False,**kwargs:Any,):try:fromvolcengine.viking_dbimportCollection,VikingDBServiceexceptImportError:raiseImportError("Could not import volcengine python package. ""Please install it with `pip install --upgrade volcengine`.")self.embedding_func=embedding_functionself.collection_name=collection_nameself.index_name="LangChainIndex"self.connection_args=connection_argsself.index_params=index_paramsself.drop_old=drop_oldself.service=VikingDBService(connection_args.host,# type: ignore[union-attr]connection_args.region,# type: ignore[union-attr]connection_args.ak,# type: ignore[union-attr]connection_args.sk,# type: ignore[union-attr]connection_args.scheme,# type: ignore[union-attr])try:col=self.service.get_collection(collection_name)exceptException:col=Noneself.collection=colself.index=Noneifself.collectionisnotNone:self.index=self.service.get_index(self.collection_name,self.index_name)ifdrop_oldandisinstance(self.collection,Collection):indexes=self.service.list_indexes(collection_name)forindexinindexes:self.service.drop_index(collection_name,index.index_name)self.service.drop_collection(collection_name)self.collection=Noneself.index=None
@propertydefembeddings(self)->Embeddings:returnself.embedding_funcdef_create_collection(self,embeddings:List,metadatas:Optional[List[dict]]=None)->None:try:fromvolcengine.viking_dbimportField,FieldTypeexceptImportError:raiseImportError("Could not import volcengine python package. ""Please install it with `pip install --upgrade volcengine`.")dim=len(embeddings[0])fields=[]ifmetadatas:forkey,valueinmetadatas[0].items():# print(key, value)ifisinstance(value,str):fields.append(Field(key,FieldType.String))elifisinstance(value,int):fields.append(Field(key,FieldType.Int64))elifisinstance(value,bool):fields.append(Field(key,FieldType.Bool))elifisinstance(value,list)andall(isinstance(item,str)foriteminvalue):fields.append(Field(key,FieldType.List_String))elifisinstance(value,list)andall(isinstance(item,int)foriteminvalue):fields.append(Field(key,FieldType.List_Int64))elifisinstance(value,bytes):fields.append(Field(key,FieldType.Text))else:raiseValueError("metadatas value is invalidplease change the type of metadatas.")# fields.append(Field("text", FieldType.String))fields.append(Field("text",FieldType.Text))fields.append(Field("primary_key",FieldType.String,is_primary_key=True))fields.append(Field("vector",FieldType.Vector,dim=dim))self.collection=self.service.create_collection(self.collection_name,fields)def_create_index(self)->None:try:fromvolcengine.viking_dbimportVectorIndexParamsexceptImportError:raiseImportError("Could not import volcengine python package. ""Please install it with `pip install --upgrade volcengine`.")cpu_quota=2vector_index=VectorIndexParams()partition_by=""scalar_index=Noneifself.index_paramsisnotNone:ifself.index_params.get("cpu_quota")isnotNone:cpu_quota=self.index_params["cpu_quota"]ifself.index_params.get("vector_index")isnotNone:vector_index=self.index_params["vector_index"]ifself.index_params.get("partition_by")isnotNone:partition_by=self.index_params["partition_by"]ifself.index_params.get("scalar_index")isnotNone:scalar_index=self.index_params["scalar_index"]self.index=self.service.create_index(self.collection_name,self.index_name,vector_index=vector_index,cpu_quota=cpu_quota,partition_by=partition_by,scalar_index=scalar_index,)
[docs]defadd_texts(# type: ignore[override]self,texts:List[str],metadatas:Optional[List[dict]]=None,batch_size:int=1000,**kwargs:Any,)->List[str]:"""Insert text data into VikingDB."""try:fromvolcengine.viking_dbimportDataexceptImportError:raiseImportError("Could not import volcengine python package. ""Please install it with `pip install --upgrade volcengine`.")texts=list(texts)try:embeddings=self.embedding_func.embed_documents(texts)exceptNotImplementedError:embeddings=[self.embedding_func.embed_query(x)forxintexts]iflen(embeddings)==0:logger.debug("Nothing to insert, skipping.")return[]ifself.collectionisNone:self._create_collection(embeddings,metadatas)self._create_index()# insert datadata=[]pks:List[str]=[]forindexinrange(len(embeddings)):primary_key=str(uuid.uuid4())pks.append(primary_key)field={"text":texts[index],"primary_key":primary_key,"vector":embeddings[index],}ifmetadatasisnotNoneandindex<len(metadatas):names=list(metadatas[index].keys())fornameinnames:field[name]=metadatas[index].get(name)# type: ignore[assignment]data.append(Data(field))total_count=len(data)foriinrange(0,total_count,batch_size):end=min(i+batch_size,total_count)insert_data=data[i:end]# print(insert_data)self.collection.upsert_data(insert_data)# type: ignore[union-attr]returnpks
[docs]defsimilarity_search(# type: ignore[override]self,query:str,params:Optional[dict]=None,**kwargs:Any,)->List[Document]:"""Perform a similarity search against the query string."""res=self.similarity_search_with_score(query=query,params=params,**kwargs)return[docfordoc,_inres]
[docs]defsimilarity_search_with_score(self,query:str,params:Optional[dict]=None,**kwargs:Any,)->List[Tuple[Document,float]]:"""Perform a search on a query string and return results with score."""embedding=self.embedding_func.embed_query(query)res=self.similarity_search_with_score_by_vector(embedding=embedding,params=params,**kwargs)returnres
[docs]defsimilarity_search_by_vector(# type: ignore[override]self,embedding:List[float],params:Optional[dict]=None,**kwargs:Any,)->List[Document]:"""Perform a similarity search against the query string."""res=self.similarity_search_with_score_by_vector(embedding=embedding,params=params,**kwargs)return[docfordoc,_inres]
[docs]defsimilarity_search_with_score_by_vector(self,embedding:List[float],params:Optional[dict]=None,**kwargs:Any,)->List[Tuple[Document,float]]:"""Perform a search on a query string and return results with score."""ifself.collectionisNone:logger.debug("No existing collection to search.")return[]filter=Nonelimit=10output_fields=Nonepartition="default"ifparamsisnotNone:ifparams.get("filter")isnotNone:filter=params["filter"]ifparams.get("limit")isnotNone:limit=params["limit"]ifparams.get("output_fields")isnotNone:output_fields=params["output_fields"]ifparams.get("partition")isnotNone:partition=params["partition"]res=self.index.search_by_vector(# type: ignore[union-attr]embedding,filter=filter,limit=limit,output_fields=output_fields,partition=partition,)ret=[]foriteminres:if"primary_key"initem.fields:item.fields.pop("primary_key")if"vector"initem.fields:item.fields.pop("vector")page_content=""if"text"initem.fields:page_content=item.fields.pop("text")doc=Document(page_content=page_content,metadata=item.fields)pair=(doc,item.score)ret.append(pair)returnret
[docs]defmax_marginal_relevance_search(# type: ignore[override]self,query:str,k:int=4,lambda_mult:float=0.5,params:Optional[dict]=None,**kwargs:Any,)->List[Document]:"""Perform a search and return results that are reordered by MMR."""embedding=self.embedding_func.embed_query(query)returnself.max_marginal_relevance_search_by_vector(embedding=embedding,k=k,lambda_mult=lambda_mult,params=params,**kwargs,)
[docs]defmax_marginal_relevance_search_by_vector(# type: ignore[override]self,embedding:List[float],k:int=4,lambda_mult:float=0.5,params:Optional[dict]=None,**kwargs:Any,)->List[Document]:"""Perform a search and return results that are reordered by MMR."""ifself.collectionisNone:logger.debug("No existing collection to search.")return[]filter=Nonelimit=10output_fields=Nonepartition="default"ifparamsisnotNone:ifparams.get("filter")isnotNone:filter=params["filter"]ifparams.get("limit")isnotNone:limit=params["limit"]ifparams.get("output_fields")isnotNone:output_fields=params["output_fields"]ifparams.get("partition")isnotNone:partition=params["partition"]res=self.index.search_by_vector(# type: ignore[union-attr]embedding,filter=filter,limit=limit,output_fields=output_fields,partition=partition,)documents=[]ordered_result_embeddings=[]foriteminres:if("vector"notinitem.fieldsor"primary_key"notinitem.fieldsor"text"notinitem.fields):continueordered_result_embeddings.append(item.fields.pop("vector"))item.fields.pop("primary_key")page_content=item.fields.pop("text")doc=Document(page_content=page_content,metadata=item.fields)documents.append(doc)new_ordering=maximal_marginal_relevance(np.array(embedding),ordered_result_embeddings,k=k,lambda_mult=lambda_mult)# Reorder the values and return.ret=[]forxinnew_ordering:# Function can return -1 indexifx==-1:breakelse:ret.append(documents[x])returnret
[docs]defdelete(self,ids:Optional[List[str]]=None,**kwargs:Any,)->None:ifself.collectionisNone:logger.debug("No existing collection to search.")self.collection.delete_data(ids)# type: ignore[union-attr]
[docs]@classmethoddeffrom_texts(# type: ignore[no-untyped-def, override]cls,texts:List[str],embedding:Embeddings,connection_args:Optional[VikingDBConfig]=None,metadatas:Optional[List[dict]]=None,collection_name:str="LangChainCollection",index_params:Optional[dict]=None,drop_old:bool=False,**kwargs:Any,):"""Create a collection, indexes it and insert data."""ifconnection_argsisNone:raiseException("VikingDBConfig does not exists")vector_db=cls(embedding_function=embedding,collection_name=collection_name,connection_args=connection_args,index_params=index_params,drop_old=drop_old,**kwargs,)vector_db.add_texts(texts=texts,metadatas=metadatas)returnvector_db