[docs]classZepCloudVectorStore(VectorStore):"""`Zep` vector store. It provides methods for adding texts or documents to the store, searching for similar documents, and deleting documents. Search scores are calculated using cosine similarity normalized to [0, 1]. Args: collection_name (str): The name of the collection in the Zep store. api_key (str): The API key for the Zep API. """
[docs]def__init__(self,collection_name:str,api_key:str,)->None:super().__init__()ifnotcollection_name:raiseValueError("collection_name must be specified when using ZepVectorStore.")try:fromzep_cloud.clientimportAsyncZep,ZepexceptImportError:raiseImportError("Could not import zep-python python package. ""Please install it with `pip install zep-python`.")self._client=Zep(api_key=api_key)self._client_async=AsyncZep(api_key=api_key)self.collection_name=collection_nameself._load_collection()
@propertydefembeddings(self)->Optional[Embeddings]:"""Unavailable for ZepCloud"""returnNonedef_load_collection(self)->DocumentCollectionResponse:""" Load the collection from the Zep backend. """fromzep_cloudimportNotFoundErrortry:collection=self._client.document.get_collection(self.collection_name)exceptNotFoundError:logger.info(f"Collection {self.collection_name} not found. Creating new collection.")collection=self._create_collection()returncollectiondef_create_collection(self)->DocumentCollectionResponse:""" Create a new collection in the Zep backend. """self._client.document.add_collection(self.collection_name)collection=self._client.document.get_collection(self.collection_name)returncollectiondef_generate_documents_to_add(self,texts:Iterable[str],metadatas:Optional[List[Dict[Any,Any]]]=None,document_ids:Optional[List[str]]=None,)->List[CreateDocumentRequest]:fromzep_cloudimportCreateDocumentRequestasZepDocumentdocuments:List[ZepDocument]=[]fori,dinenumerate(texts):documents.append(ZepDocument(content=d,metadata=metadatas[i]ifmetadataselseNone,document_id=document_ids[i]ifdocument_idselseNone,))returndocuments
[docs]defadd_texts(self,texts:Iterable[str],metadatas:Optional[List[Dict[str,Any]]]=None,document_ids:Optional[List[str]]=None,**kwargs:Any,)->List[str]:"""Run more texts through the embeddings and add to the vectorstore. Args: texts: Iterable of strings to add to the vectorstore. metadatas: Optional list of metadatas associated with the texts. document_ids: Optional list of document ids associated with the texts. kwargs: vectorstore specific parameters Returns: List of ids from adding the texts into the vectorstore. """documents=self._generate_documents_to_add(texts,metadatas,document_ids)uuids=self._client.document.add_documents(self.collection_name,request=documents)returnuuids
[docs]asyncdefaadd_texts(self,texts:Iterable[str],metadatas:Optional[List[Dict[str,Any]]]=None,document_ids:Optional[List[str]]=None,**kwargs:Any,)->List[str]:"""Run more texts through the embeddings and add to the vectorstore."""documents=self._generate_documents_to_add(texts,metadatas,document_ids)uuids=awaitself._client_async.document.add_documents(self.collection_name,request=documents)returnuuids
[docs]defsearch(self,query:str,search_type:SearchType,metadata:Optional[Dict[str,Any]]=None,k:int=3,**kwargs:Any,)->List[Document]:"""Return docs most similar to query using specified search type."""ifsearch_type=="similarity":returnself.similarity_search(query,k=k,metadata=metadata,**kwargs)elifsearch_type=="mmr":returnself.max_marginal_relevance_search(query,k=k,metadata=metadata,**kwargs)else:raiseValueError(f"search_type of {search_type} not allowed. Expected ""search_type to be 'similarity' or 'mmr'.")
[docs]asyncdefasearch(self,query:str,search_type:str,metadata:Optional[Dict[str,Any]]=None,k:int=3,**kwargs:Any,)->List[Document]:"""Return docs most similar to query using specified search type."""ifsearch_type=="similarity":returnawaitself.asimilarity_search(query,k=k,metadata=metadata,**kwargs)elifsearch_type=="mmr":returnawaitself.amax_marginal_relevance_search(query,k=k,metadata=metadata,**kwargs)else:raiseValueError(f"search_type of {search_type} not allowed. Expected ""search_type to be 'similarity' or 'mmr'.")
[docs]defsimilarity_search(self,query:str,k:int=4,metadata:Optional[Dict[str,Any]]=None,**kwargs:Any,)->List[Document]:"""Return docs most similar to query."""results=self._similarity_search_with_relevance_scores(query,k=k,metadata=metadata,**kwargs)return[docfordoc,_inresults]
[docs]defsimilarity_search_with_score(self,query:str,k:int=4,metadata:Optional[Dict[str,Any]]=None,**kwargs:Any,)->List[Tuple[Document,float]]:"""Run similarity search with distance."""returnself._similarity_search_with_relevance_scores(query,k=k,metadata=metadata,**kwargs)
def_similarity_search_with_relevance_scores(self,query:str,k:int=4,metadata:Optional[Dict[str,Any]]=None,**kwargs:Any,)->List[Tuple[Document,float]]:""" Default similarity search with relevance scores. Modify if necessary in subclass. Return docs and relevance scores in the range [0, 1]. 0 is dissimilar, 1 is most similar. Args: query: input text k: Number of Documents to return. Defaults to 4. metadata: Optional, metadata filter **kwargs: kwargs to be passed to similarity search. Should include: score_threshold: Optional, a floating point value between 0 to 1 and filter the resulting set of retrieved docs Returns: List of Tuples of (doc, similarity_score) """results=self._client.document.search(collection_name=self.collection_name,text=query,limit=k,metadata=metadata,**kwargs,)return[(Document(page_content=str(doc.content),metadata=doc.metadata,),doc.scoreor0.0,)fordocinresults.resultsor[]]
[docs]asyncdefasimilarity_search_with_relevance_scores(self,query:str,k:int=4,metadata:Optional[Dict[str,Any]]=None,**kwargs:Any,)->List[Tuple[Document,float]]:"""Return docs most similar to query."""results=awaitself._client_async.document.search(collection_name=self.collection_name,text=query,limit=k,metadata=metadata,**kwargs,)return[(Document(page_content=str(doc.content),metadata=doc.metadata,),doc.scoreor0.0,)fordocinresults.resultsor[]]
[docs]asyncdefasimilarity_search(self,query:str,k:int=4,metadata:Optional[Dict[str,Any]]=None,**kwargs:Any,)->List[Document]:"""Return docs most similar to query."""results=awaitself.asimilarity_search_with_relevance_scores(query,k,metadata=metadata,**kwargs)return[docfordoc,_inresults]
[docs]defsimilarity_search_by_vector(self,embedding:List[float],k:int=4,metadata:Optional[Dict[str,Any]]=None,**kwargs:Any,)->List[Document]:"""Unsupported in Zep Cloud"""warnings.warn("similarity_search_by_vector is not supported in Zep Cloud")return[]
[docs]asyncdefasimilarity_search_by_vector(self,embedding:List[float],k:int=4,metadata:Optional[Dict[str,Any]]=None,**kwargs:Any,)->List[Document]:"""Unsupported in Zep Cloud"""warnings.warn("asimilarity_search_by_vector is not supported in Zep Cloud")return[]
[docs]defmax_marginal_relevance_search(self,query:str,k:int=4,fetch_k:int=20,lambda_mult:float=0.5,metadata:Optional[Dict[str,Any]]=None,**kwargs:Any,)->List[Document]:"""Return docs selected using the maximal marginal relevance. Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents. Args: query: Text to look up documents similar to. k: Number of Documents to return. Defaults to 4. fetch_k: Number of Documents to fetch to pass to MMR algorithm. Zep determines this automatically and this parameter is ignored. lambda_mult: Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. metadata: Optional, metadata to filter the resulting set of retrieved docs Returns: List of Documents selected by maximal marginal relevance. """results=self._client.document.search(collection_name=self.collection_name,text=query,limit=k,metadata=metadata,search_type="mmr",mmr_lambda=lambda_mult,**kwargs,)return[Document(page_content=str(d.content),metadata=d.metadata)fordinresults.resultsor[]]
[docs]asyncdefamax_marginal_relevance_search(self,query:str,k:int=4,fetch_k:int=20,lambda_mult:float=0.5,metadata:Optional[Dict[str,Any]]=None,**kwargs:Any,)->List[Document]:"""Return docs selected using the maximal marginal relevance."""results=awaitself._client_async.document.search(collection_name=self.collection_name,text=query,limit=k,metadata=metadata,search_type="mmr",mmr_lambda=lambda_mult,**kwargs,)return[Document(page_content=str(d.content),metadata=d.metadata)fordinresults.resultsor[]]
[docs]defmax_marginal_relevance_search_by_vector(self,embedding:List[float],k:int=4,fetch_k:int=20,lambda_mult:float=0.5,metadata:Optional[Dict[str,Any]]=None,**kwargs:Any,)->List[Document]:"""Unsupported in Zep Cloud"""warnings.warn("max_marginal_relevance_search_by_vector is not supported in Zep Cloud")return[]
[docs]asyncdefamax_marginal_relevance_search_by_vector(self,embedding:List[float],k:int=4,fetch_k:int=20,lambda_mult:float=0.5,metadata:Optional[Dict[str,Any]]=None,**kwargs:Any,)->List[Document]:"""Unsupported in Zep Cloud"""warnings.warn("amax_marginal_relevance_search_by_vector is not supported in Zep Cloud")return[]
[docs]@classmethoddeffrom_texts(cls,texts:List[str],embedding:Embeddings,metadatas:Optional[List[dict]]=None,collection_name:str="",api_key:Optional[str]=None,**kwargs:Any,)->ZepCloudVectorStore:""" Class method that returns a ZepVectorStore instance initialized from texts. If the collection does not exist, it will be created. Args: texts (List[str]): The list of texts to add to the vectorstore. metadatas (Optional[List[Dict[str, Any]]]): Optional list of metadata associated with the texts. collection_name (str): The name of the collection in the Zep store. api_key (str): The API key for the Zep API. kwargs: Additional parameters specific to the vectorstore. Returns: ZepVectorStore: An instance of ZepVectorStore. """ifnotapi_key:raiseValueError("api_key must be specified when using ZepVectorStore.")vecstore=cls(collection_name=collection_name,api_key=api_key,)vecstore.add_texts(texts,metadatas)returnvecstore
[docs]defdelete(self,ids:Optional[List[str]]=None,**kwargs:Any)->None:"""Delete by Zep vector UUIDs. Parameters ---------- ids : Optional[List[str]] The UUIDs of the vectors to delete. Raises ------ ValueError If no UUIDs are provided. """ifidsisNoneorlen(ids)==0:raiseValueError("No uuids provided to delete.")foruinids:self._client.document.delete_document(self.collection_name,u)