[docs]classTablestoreVectorStore(VectorStore):"""`Tablestore` vector store. To use, you should have the ``tablestore`` python package installed. Example: .. code-block:: python import os from langchain_openai import OpenAIEmbeddings from langchain_community.vectorstores import TablestoreVectorStore import tablestore embeddings = OpenAIEmbeddings() store = TablestoreVectorStore( embeddings, endpoint=os.getenv("end_point"), instance_name=os.getenv("instance_name"), access_key_id=os.getenv("access_key_id"), access_key_secret=os.getenv("access_key_secret"), vector_dimension=512, # metadata mapping is used to filter non-vector fields. metadata_mappings=[ tablestore.FieldSchema( "type", tablestore.FieldType.KEYWORD, index=True, enable_sort_and_agg=True ), tablestore.FieldSchema( "time", tablestore.FieldType.LONG, index=True, enable_sort_and_agg=True ), ] ) """
[docs]def__init__(self,embedding:Embeddings,*,endpoint:Optional[str]=None,instance_name:Optional[str]=None,access_key_id:Optional[str]=None,access_key_secret:Optional[str]=None,table_name:Optional[str]="langchain_vector_store_ots_v1",index_name:Optional[str]="langchain_vector_store_ots_index_v1",text_field:Optional[str]="content",vector_field:Optional[str]="embedding",vector_dimension:int=512,vector_metric_type:Optional[str]="cosine",metadata_mappings:Optional[List[Any]]=None,):try:importtablestoreexceptImportError:raiseImportError("Could not import tablestore python package. ""Please install it with `pip install tablestore`.")self.__embedding=embeddingself.__tablestore_client=tablestore.OTSClient(endpoint,access_key_id,access_key_secret,instance_name,retry_policy=tablestore.WriteRetryPolicy(),)self.__table_name=table_nameself.__index_name=index_nameself.__vector_dimension=vector_dimensionself.__vector_field=vector_fieldself.__text_field=text_fieldifvector_metric_type=="cosine":self.__vector_metric_type=tablestore.VectorMetricType.VM_COSINEelifvector_metric_type=="euclidean":self.__vector_metric_type=tablestore.VectorMetricType.VM_EUCLIDEANelifvector_metric_type=="dot_product":self.__vector_metric_type=tablestore.VectorMetricType.VM_DOT_PRODUCTelse:raiseValueError(f"Unsupported vector_metric_type operator: {vector_metric_type}")self.__metadata_mappings=[tablestore.FieldSchema(self.__text_field,tablestore.FieldType.TEXT,index=True,enable_sort_and_agg=False,store=False,analyzer=tablestore.AnalyzerType.MAXWORD,),tablestore.FieldSchema(self.__vector_field,tablestore.FieldType.VECTOR,vector_options=tablestore.VectorOptions(data_type=tablestore.VectorDataType.VD_FLOAT_32,dimension=self.__vector_dimension,metric_type=self.__vector_metric_type,),),]ifmetadata_mappings:formappinginmetadata_mappings:ifnotisinstance(mapping,tablestore.FieldSchema):raiseValueError(f"meta_data mapping should be an "f"instance of tablestore.FieldSchema, "f"bug got {type(mapping)}")if(mapping.field_name==text_fieldormapping.field_name==vector_field):continueself.__metadata_mappings.append(mapping)
[docs]defcreate_table_if_not_exist(self)->None:"""Create table if not exist."""try:importtablestoreexceptImportError:raiseImportError("Could not import tablestore python package. ""Please install it with `pip install tablestore`.")table_list=self.__tablestore_client.list_table()ifself.__table_nameintable_list:logger.info("Tablestore system table[%s] already exists",self.__table_name)returnNonelogger.info("Tablestore system table[%s] does not exist, try to create the table.",self.__table_name,)schema_of_primary_key=[("id","STRING")]table_meta=tablestore.TableMeta(self.__table_name,schema_of_primary_key)table_options=tablestore.TableOptions()reserved_throughput=tablestore.ReservedThroughput(tablestore.CapacityUnit(0,0))try:self.__tablestore_client.create_table(table_meta,table_options,reserved_throughput)logger.info("Tablestore create table[%s] successfully.",self.__table_name)excepttablestore.OTSClientErrorase:logger.exception("Tablestore create system table[%s] failed with client error, ""http_status:%d, error_message:%s",self.__table_name,e.get_http_status(),e.get_error_message(),)excepttablestore.OTSServiceErrorase:logger.exception("Tablestore create system table[%s] failed with client error, ""http_status:%d, error_code:%s, error_message:%s, request_id:%s",self.__table_name,e.get_http_status(),e.get_error_code(),e.get_error_message(),e.get_request_id(),)
[docs]defcreate_search_index_if_not_exist(self)->None:"""Create search index if not exist."""try:importtablestoreexceptImportError:raiseImportError("Could not import tablestore python package. ""Please install it with `pip install tablestore`.")search_index_list=self.__tablestore_client.list_search_index(table_name=self.__table_name)ifself.__index_namein[t[1]fortinsearch_index_list]:logger.info("Tablestore system index[%s] already exists",self.__index_name)returnNoneindex_meta=tablestore.SearchIndexMeta(self.__metadata_mappings)self.__tablestore_client.create_search_index(self.__table_name,self.__index_name,index_meta)logger.info("Tablestore create system index[%s] successfully.",self.__index_name)
[docs]defdelete_table_if_exists(self)->None:"""Delete table if exists."""search_index_list=self.__tablestore_client.list_search_index(table_name=self.__table_name)forresp_tupleinsearch_index_list:self.__tablestore_client.delete_search_index(resp_tuple[0],resp_tuple[1])self.__tablestore_client.delete_table(self.__table_name)
def__write_row(self,row_id:str,content:str,embedding_vector:List[float],meta_data:dict)->None:try:importtablestoreexceptImportError:raiseImportError("Could not import tablestore python package. ""Please install it with `pip install tablestore`.")primary_key=[("id",row_id)]attribute_columns=[(self.__text_field,content),(self.__vector_field,json.dumps(embedding_vector)),]fork,vinmeta_data.items():item=(k,v)attribute_columns.append(item)row=tablestore.Row(primary_key,attribute_columns)try:self.__tablestore_client.put_row(self.__table_name,row)logger.debug("Tablestore put row successfully. id:%s, content:%s, meta_data:%s",row_id,content,meta_data,)excepttablestore.OTSClientErrorase:logger.exception("Tablestore put row failed with client error:%s, ""id:%s, content:%s, meta_data:%s",e,row_id,content,meta_data,)excepttablestore.OTSServiceErrorase:logger.exception("Tablestore put row failed with client error:%s, id:%s, content:%s, ""meta_data:%s, http_status:%d, ""error_code:%s, error_message:%s, request_id:%s",e,row_id,content,meta_data,e.get_http_status(),e.get_error_code(),e.get_error_message(),e.get_request_id(),)def__delete_row(self,row_id:str)->None:try:importtablestoreexceptImportError:raiseImportError("Could not import tablestore python package. ""Please install it with `pip install tablestore`.")primary_key=[("id",row_id)]try:self.__tablestore_client.delete_row(self.__table_name,primary_key,None)logger.info("Tablestore delete row successfully. id:%s",row_id)excepttablestore.OTSClientErrorase:logger.exception("Tablestore delete row failed with client error:%s, id:%s",e,row_id)excepttablestore.OTSServiceErrorase:logger.exception("Tablestore delete row failed with client error:%s, ""id:%s, http_status:%d, error_code:%s, error_message:%s, request_id:%s",e,row_id,e.get_http_status(),e.get_error_code(),e.get_error_message(),e.get_request_id(),)def__get_row(self,row_id:str)->Document:try:importtablestoreexceptImportError:raiseImportError("Could not import tablestore python package. ""Please install it with `pip install tablestore`.")primary_key=[("id",row_id)]try:_,row,_=self.__tablestore_client.get_row(self.__table_name,primary_key,None,None,1)logger.debug("Tablestore get row successfully. id:%s",row_id)ifrowisNone:raiseValueError("Can't not find row_id:%s in tablestore."%row_id)document_id=row.primary_key[0][1]meta_data={}text=""forcolinrow.attribute_columns:key=col[0]val=col[1]ifkey==self.__text_field:text=valcontinuemeta_data[key]=valreturnDocument(id=document_id,page_content=text,metadata=meta_data,)excepttablestore.OTSClientErrorase:logger.exception("Tablestore get row failed with client error:%s, id:%s",e,row_id)raiseeexcepttablestore.OTSServiceErrorase:logger.exception("Tablestore get row failed with client error:%s, ""id:%s, http_status:%d, error_code:%s, error_message:%s, request_id:%s",e,row_id,e.get_http_status(),e.get_error_code(),e.get_error_message(),e.get_request_id(),)raiseedef_tablestore_search(self,query_embedding:List[float],k:int=5,tablestore_filter_query:Optional[Any]=None,**kwargs:Any,)->List[Tuple[Document,float]]:try:importtablestoreexceptImportError:raiseImportError("Could not import tablestore python package. ""Please install it with `pip install tablestore`.")iftablestore_filter_query:ifnotisinstance(tablestore_filter_query,tablestore.Query):raiseValueError(f"table_store_filter_query should be "f"an instance of tablestore.Query, "f"bug got {type(tablestore_filter_query)}")if"knn_top_k"inkwargs:knn_top_k=kwargs["knn_top_k"]else:knn_top_k=kots_query=tablestore.KnnVectorQuery(field_name=self.__vector_field,top_k=knn_top_k,float32_query_vector=query_embedding,filter=tablestore_filter_query,)sort=tablestore.Sort(sorters=[tablestore.ScoreSort(sort_order=tablestore.SortOrder.DESC)])search_query=tablestore.SearchQuery(ots_query,limit=k,get_total_count=False,sort=sort)try:search_response=self.__tablestore_client.search(table_name=self.__table_name,index_name=self.__index_name,search_query=search_query,columns_to_get=tablestore.ColumnsToGet(return_type=tablestore.ColumnReturnType.ALL),)logger.info("Tablestore search successfully. request_id:%s",search_response.request_id,)tuple_list=[]forhitinsearch_response.search_hits:row=hit.rowscore=hit.scoredocument_id=row[0][0][1]meta_data={}text=""forcolinrow[1]:key=col[0]val=col[1]ifkey==self.__text_field:text=valcontinueifkey==self.__vector_field:val=json.loads(val)meta_data[key]=valdoc=Document(id=document_id,page_content=text,metadata=meta_data,)tuple_list.append((doc,score))returntuple_listexcepttablestore.OTSClientErrorase:logger.exception("Tablestore search failed with client error:%s",e)raiseeexcepttablestore.OTSServiceErrorase:logger.exception("Tablestore search failed with client error:%s, ""http_status:%d, error_code:%s, error_message:%s, request_id:%s",e,e.get_http_status(),e.get_error_code(),e.get_error_message(),e.get_request_id(),)raisee
[docs]defadd_texts(self,texts:Iterable[str],metadatas:Optional[List[dict]]=None,ids:Optional[List[str]]=None,**kwargs:Any,)->List[str]:ids=idsor[str(uuid.uuid4().hex)for_intexts]text_list=list(texts)embeddings=self.__embedding.embed_documents(text_list)foriinrange(len(ids)):row_id=ids[i]text=text_list[i]embedding_vector=embeddings[i]iflen(embedding_vector)!=self.__vector_dimension:raiseRuntimeError("embedding vector size:%d is not the same as vector store dim:%d"%(len(embedding_vector),self.__vector_dimension))metadata=dict()ifmetadatasandmetadatas[i]:metadata=metadatas[i]self.__write_row(row_id=row_id,content=text,embedding_vector=embedding_vector,meta_data=metadata,)returnids