[docs]def__init__(self,embedding:Embeddings)->None:"""Initialize with the given embedding function. Args: embedding: embedding function to use. """# TODO: would be nice to change to# Dict[str, Document] at some point (will be a breaking change)self.store:Dict[str,Dict[str,Any]]={}self.embedding=embedding
[docs]defadd_documents(self,documents:List[Document],ids:Optional[List[str]]=None,**kwargs:Any,)->List[str]:"""Add documents to the store."""texts=[doc.page_contentfordocindocuments]vectors=self.embedding.embed_documents(texts)ifidsandlen(ids)!=len(texts):raiseValueError(f"ids must be the same length as texts. "f"Got {len(ids)} ids and {len(texts)} texts.")id_iterator:Iterator[Optional[str]]=(iter(ids)ifidselseiter(doc.idfordocindocuments))ids_=[]fordoc,vectorinzip(documents,vectors):doc_id=next(id_iterator)doc_id_=doc_idifdoc_idelsestr(uuid.uuid4())ids_.append(doc_id_)self.store[doc_id_]={"id":doc_id_,"vector":vector,"text":doc.page_content,"metadata":doc.metadata,}returnids_
[docs]asyncdefaadd_documents(self,documents:List[Document],ids:Optional[List[str]]=None,**kwargs:Any)->List[str]:"""Add documents to the store."""texts=[doc.page_contentfordocindocuments]vectors=awaitself.embedding.aembed_documents(texts)ifidsandlen(ids)!=len(texts):raiseValueError(f"ids must be the same length as texts. "f"Got {len(ids)} ids and {len(texts)} texts.")id_iterator:Iterator[Optional[str]]=(iter(ids)ifidselseiter(doc.idfordocindocuments))ids_:List[str]=[]fordoc,vectorinzip(documents,vectors):doc_id=next(id_iterator)doc_id_=doc_idifdoc_idelsestr(uuid.uuid4())ids_.append(doc_id_)self.store[doc_id_]={"id":doc_id_,"vector":vector,"text":doc.page_content,"metadata":doc.metadata,}returnids_
[docs]defget_by_ids(self,ids:Sequence[str],/)->List[Document]:"""Get documents by their ids. Args: ids: The ids of the documents to get. Returns: A list of Document objects. """documents=[]fordoc_idinids:doc=self.store.get(doc_id)ifdoc:documents.append(Document(id=doc["id"],page_content=doc["text"],metadata=doc["metadata"],))returndocuments
[docs]@deprecated(alternative="VectorStore.add_documents",message=("This was a beta API that was added in 0.2.11. ""It'll be removed in 0.3.0."),since="0.2.29",removal="1.0",)defupsert(self,items:Sequence[Document],/,**kwargs:Any)->UpsertResponse:vectors=self.embedding.embed_documents([item.page_contentforiteminitems])ids=[]foritem,vectorinzip(items,vectors):doc_id=item.idifitem.idelsestr(uuid.uuid4())ids.append(doc_id)self.store[doc_id]={"id":doc_id,"vector":vector,"text":item.page_content,"metadata":item.metadata,}return{"succeeded":ids,"failed":[],}
[docs]@deprecated(alternative="VectorStore.aadd_documents",message=("This was a beta API that was added in 0.2.11. ""It'll be removed in 0.3.0."),since="0.2.29",removal="1.0",)asyncdefaupsert(self,items:Sequence[Document],/,**kwargs:Any)->UpsertResponse:vectors=awaitself.embedding.aembed_documents([item.page_contentforiteminitems])ids=[]foritem,vectorinzip(items,vectors):doc_id=item.idifitem.idelsestr(uuid.uuid4())ids.append(doc_id)self.store[doc_id]={"id":doc_id,"vector":vector,"text":item.page_content,"metadata":item.metadata,}return{"succeeded":ids,"failed":[],}
[docs]asyncdefaget_by_ids(self,ids:Sequence[str],/)->List[Document]:"""Async get documents by their ids. Args: ids: The ids of the documents to get. Returns: A list of Document objects. """returnself.get_by_ids(ids)
[docs]defmax_marginal_relevance_search_by_vector(self,embedding:List[float],k:int=4,fetch_k:int=20,lambda_mult:float=0.5,**kwargs:Any,)->List[Document]:prefetch_hits=self._similarity_search_with_score_by_vector(embedding=embedding,k=fetch_k,**kwargs,)try:importnumpyasnpexceptImportErrorase:raiseImportError("numpy must be installed to use max_marginal_relevance_search ""pip install numpy")fromemmr_chosen_indices=maximal_marginal_relevance(np.array(embedding,dtype=np.float32),[vectorfor_,_,vectorinprefetch_hits],k=k,lambda_mult=lambda_mult,)return[prefetch_hits[idx][0]foridxinmmr_chosen_indices]
[docs]@classmethoddefload(cls,path:str,embedding:Embeddings,**kwargs:Any)->InMemoryVectorStore:"""Load a vector store from a file. Args: path: The path to load the vector store from. embedding: The embedding to use. kwargs: Additional arguments to pass to the constructor. Returns: A VectorStore object. """_path:Path=Path(path)with_path.open("r")asf:store=load(json.load(f))vectorstore=cls(embedding=embedding,**kwargs)vectorstore.store=storereturnvectorstore
[docs]defdump(self,path:str)->None:"""Dump the vector store to a file. Args: path: The path to dump the vector store to. """_path:Path=Path(path)_path.parent.mkdir(exist_ok=True,parents=True)with_path.open("w")asf:json.dump(dumpd(self.store),f,indent=2)