[docs]classTypesense(VectorStore):"""`Typesense` vector store. To use, you should have the ``typesense`` python package installed. Example: .. code-block:: python from langchain_community.embedding.openai import OpenAIEmbeddings from langchain_community.vectorstores import Typesense import typesense node = { "host": "localhost", # For Typesense Cloud use xxx.a1.typesense.net "port": "8108", # For Typesense Cloud use 443 "protocol": "http" # For Typesense Cloud use https } typesense_client = typesense.Client( { "nodes": [node], "api_key": "<API_KEY>", "connection_timeout_seconds": 2 } ) typesense_collection_name = "langchain-memory" embedding = OpenAIEmbeddings() vectorstore = Typesense( typesense_client=typesense_client, embedding=embedding, typesense_collection_name=typesense_collection_name, text_key="text", ) """
[docs]def__init__(self,typesense_client:Client,embedding:Embeddings,*,typesense_collection_name:Optional[str]=None,text_key:str="text",):"""Initialize with Typesense client."""try:fromtypesenseimportClientexceptImportError:raiseImportError("Could not import typesense python package. ""Please install it with `pip install typesense`.")ifnotisinstance(typesense_client,Client):raiseValueError(f"typesense_client should be an instance of typesense.Client, "f"got {type(typesense_client)}")self._typesense_client=typesense_clientself._embedding=embeddingself._typesense_collection_name=(typesense_collection_nameorf"langchain-{str(uuid.uuid4())}")self._text_key=text_key
@propertydef_collection(self)->Collection:returnself._typesense_client.collections[self._typesense_collection_name]@propertydefembeddings(self)->Embeddings:returnself._embeddingdef_prep_texts(self,texts:Iterable[str],metadatas:Optional[List[dict]],ids:Optional[List[str]],)->List[dict]:"""Embed and create the documents"""_ids=idsor(str(uuid.uuid4())for_intexts)_metadatas:Iterable[dict]=metadatasor({}for_intexts)embedded_texts=self._embedding.embed_documents(list(texts))return[{"id":_id,"vec":vec,f"{self._text_key}":text,"metadata":metadata}for_id,vec,text,metadatainzip(_ids,embedded_texts,texts,_metadatas)]def_create_collection(self,num_dim:int)->None:fields=[{"name":"vec","type":"float[]","num_dim":num_dim},{"name":f"{self._text_key}","type":"string"},{"name":".*","type":"auto"},]self._typesense_client.collections.create({"name":self._typesense_collection_name,"fields":fields})
[docs]defadd_texts(self,texts:Iterable[str],metadatas:Optional[List[dict]]=None,ids:Optional[List[str]]=None,**kwargs:Any,)->List[str]:"""Run more texts through the embedding and add to the vectorstore. Args: texts: Iterable of strings to add to the vectorstore. metadatas: Optional list of metadatas associated with the texts. ids: Optional list of ids to associate with the texts. Returns: List of ids from adding the texts into the vectorstore. """fromtypesense.exceptionsimportObjectNotFounddocs=self._prep_texts(texts,metadatas,ids)try:self._collection.documents.import_(docs,{"action":"upsert"})exceptObjectNotFound:# Create the collection if it doesn't already existself._create_collection(len(docs[0]["vec"]))self._collection.documents.import_(docs,{"action":"upsert"})return[doc["id"]fordocindocs]
[docs]defsimilarity_search_with_score(self,query:str,k:int=10,filter:Optional[str]="",)->List[Tuple[Document,float]]:"""Return typesense documents most similar to query, along with scores. Args: query: Text to look up documents similar to. k: Number of Documents to return. Defaults to 10. Minimum 10 results would be returned. filter: typesense filter_by expression to filter documents on Returns: List of Documents most similar to the query and score for each """embedded_query=[str(x)forxinself._embedding.embed_query(query)]query_obj={"q":"*","vector_query":f'vec:([{",".join(embedded_query)}], k:{k})',"filter_by":filter,"collection":self._typesense_collection_name,}docs=[]response=self._typesense_client.multi_search.perform({"searches":[query_obj]},{})forhitinresponse["results"][0]["hits"]:document=hit["document"]metadata=document["metadata"]text=document[self._text_key]score=hit["vector_distance"]docs.append((Document(page_content=text,metadata=metadata),score))returndocs
[docs]defsimilarity_search(self,query:str,k:int=10,filter:Optional[str]="",**kwargs:Any,)->List[Document]:"""Return typesense documents most similar to query. Args: query: Text to look up documents similar to. k: Number of Documents to return. Defaults to 10. Minimum 10 results would be returned. filter: typesense filter_by expression to filter documents on Returns: List of Documents most similar to the query and score for each """docs_and_score=self.similarity_search_with_score(query,k=k,filter=filter)return[docfordoc,_indocs_and_score]
[docs]@classmethoddeffrom_client_params(cls,embedding:Embeddings,*,host:str="localhost",port:Union[str,int]="8108",protocol:str="http",typesense_api_key:Optional[str]=None,connection_timeout_seconds:int=2,**kwargs:Any,)->Typesense:"""Initialize Typesense directly from client parameters. Example: .. code-block:: python from langchain_community.embedding.openai import OpenAIEmbeddings from langchain_community.vectorstores import Typesense # Pass in typesense_api_key as kwarg or set env var "TYPESENSE_API_KEY". vectorstore = Typesense( OpenAIEmbeddings(), host="localhost", port="8108", protocol="http", typesense_collection_name="langchain-memory", ) """try:fromtypesenseimportClientexceptImportError:raiseImportError("Could not import typesense python package. ""Please install it with `pip install typesense`.")node={"host":host,"port":str(port),"protocol":protocol,}typesense_api_key=typesense_api_keyorget_from_env("typesense_api_key","TYPESENSE_API_KEY")client_config={"nodes":[node],"api_key":typesense_api_key,"connection_timeout_seconds":connection_timeout_seconds,}returncls(Client(client_config),embedding,**kwargs)
[docs]@classmethoddeffrom_texts(cls,texts:List[str],embedding:Embeddings,metadatas:Optional[List[dict]]=None,ids:Optional[List[str]]=None,typesense_client:Optional[Client]=None,typesense_client_params:Optional[dict]=None,typesense_collection_name:Optional[str]=None,text_key:str="text",**kwargs:Any,)->Typesense:"""Construct Typesense wrapper from raw text."""iftypesense_client:vectorstore=cls(typesense_client,embedding,**kwargs)eliftypesense_client_params:vectorstore=cls.from_client_params(embedding,**typesense_client_params,**kwargs)else:raiseValueError("Must specify one of typesense_client or typesense_client_params.")vectorstore.add_texts(texts,metadatas=metadatas,ids=ids)returnvectorstore