[docs]defdependable_usearch_import()->Any:""" Import usearch if available, otherwise raise error. """returnguard_import("usearch.index")
[docs]classUSearch(VectorStore):"""`USearch` vector store. To use, you should have the ``usearch`` python package installed. """
[docs]def__init__(self,embedding:Embeddings,index:Any,docstore:Docstore,ids:List[str],):"""Initialize with necessary components."""self.embedding=embeddingself.index=indexself.docstore=docstoreself.ids=ids
[docs]defadd_texts(self,texts:Iterable[str],metadatas:Optional[List[Dict]]=None,ids:Optional[Union[np.ndarray,list[str]]]=None,**kwargs:Any,)->List[str]:"""Run more texts through the embeddings and add to the vectorstore. Args: texts: Iterable of strings to add to the vectorstore. metadatas: Optional list of metadatas associated with the texts. ids: Optional list of unique IDs. Returns: List of ids from adding the texts into the vectorstore. """ifnotisinstance(self.docstore,AddableMixin):raiseValueError("If trying to add texts, the underlying docstore should support "f"adding items, which {self.docstore} does not")embeddings=self.embedding.embed_documents(list(texts))documents=[]fori,textinenumerate(texts):metadata=metadatas[i]ifmetadataselse{}documents.append(Document(page_content=text,metadata=metadata))last_id=int(self.ids[-1])+1ifidsisNone:ids=np.array([str(last_id+id)forid,_inenumerate(texts)])elifisinstance(ids,list):ids=np.array(ids)self.index.add(np.array(ids),np.array(embeddings))self.docstore.add(dict(zip(ids,documents)))self.ids.extend(ids)returncast(List[str],ids.tolist())
[docs]defsimilarity_search_with_score(self,query:str,k:int=4,)->List[Tuple[Document,float]]:"""Return docs most similar to query. Args: query: Text to look up documents similar to. k: Number of Documents to return. Defaults to 4. Returns: List of documents most similar to the query with distance. """query_embedding=self.embedding.embed_query(query)matches=self.index.search(np.array(query_embedding),k)docs_with_scores:List[Tuple[Document,float]]=[]forid,scoreinzip(matches.keys,matches.distances):doc=self.docstore.search(str(id))ifnotisinstance(doc,Document):raiseValueError(f"Could not find document for id {id}, got {doc}")docs_with_scores.append((doc,score))returndocs_with_scores
[docs]defsimilarity_search(self,query:str,k:int=4,**kwargs:Any,)->List[Document]:"""Return docs most similar to query. Args: query: Text to look up documents similar to. k: Number of Documents to return. Defaults to 4. Returns: List of Documents most similar to the query. """query_embedding=self.embedding.embed_query(query)matches=self.index.search(np.array(query_embedding),k)docs:List[Document]=[]foridinmatches.keys:doc=self.docstore.search(str(id))ifnotisinstance(doc,Document):raiseValueError(f"Could not find document for id {id}, got {doc}")docs.append(doc)returndocs
[docs]@classmethoddeffrom_texts(cls,texts:List[str],embedding:Embeddings,metadatas:Optional[List[Dict]]=None,ids:Optional[Union[np.ndarray,list[str]]]=None,metric:str="cos",**kwargs:Any,)->USearch:"""Construct USearch wrapper from raw documents. This is a user friendly interface that: 1. Embeds documents. 2. Creates an in memory docstore 3. Initializes the USearch database This is intended to be a quick way to get started. Example: .. code-block:: python from langchain_community.vectorstores import USearch from langchain_community.embeddings import OpenAIEmbeddings embeddings = OpenAIEmbeddings() usearch = USearch.from_texts(texts, embeddings) """embeddings=embedding.embed_documents(texts)documents:List[Document]=[]ifidsisNone:ids=np.array([str(id)forid,_inenumerate(texts)])elifisinstance(ids,list):ids=np.array(ids)fori,textinenumerate(texts):metadata=metadatas[i]ifmetadataselse{}documents.append(Document(page_content=text,metadata=metadata))docstore=InMemoryDocstore(dict(zip(ids,documents)))usearch=guard_import("usearch.index")index=usearch.Index(ndim=len(embeddings[0]),metric=metric)index.add(np.array(ids),np.array(embeddings))returncls(embedding,index,docstore,cast(List[str],ids.tolist()))