Source code for langchain_community.vectorstores.usearch

from __future__ import annotations

from typing import Any, Dict, Iterable, List, Optional, Tuple, Union, cast

import numpy as np
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from langchain_core.utils import guard_import
from langchain_core.vectorstores import VectorStore

from langchain_community.docstore.base import AddableMixin, Docstore
from langchain_community.docstore.in_memory import InMemoryDocstore


[docs] def dependable_usearch_import() -> Any: """ Import usearch if available, otherwise raise error. """ return guard_import("usearch.index")
[docs] class USearch(VectorStore): """`USearch` vector store. To use, you should have the ``usearch`` python package installed. """
[docs] def __init__( self, embedding: Embeddings, index: Any, docstore: Docstore, ids: List[str], ): """Initialize with necessary components.""" self.embedding = embedding self.index = index self.docstore = docstore self.ids = ids
[docs] def add_texts( self, texts: Iterable[str], metadatas: Optional[List[Dict]] = None, ids: Optional[Union[np.ndarray, list[str]]] = None, **kwargs: Any, ) -> List[str]: """Run more texts through the embeddings and add to the vectorstore. Args: texts: Iterable of strings to add to the vectorstore. metadatas: Optional list of metadatas associated with the texts. ids: Optional list of unique IDs. Returns: List of ids from adding the texts into the vectorstore. """ if not isinstance(self.docstore, AddableMixin): raise ValueError( "If trying to add texts, the underlying docstore should support " f"adding items, which {self.docstore} does not" ) embeddings = self.embedding.embed_documents(list(texts)) documents = [] for i, text in enumerate(texts): metadata = metadatas[i] if metadatas else {} documents.append(Document(page_content=text, metadata=metadata)) last_id = int(self.ids[-1]) + 1 if ids is None: ids = np.array([str(last_id + id) for id, _ in enumerate(texts)]) elif isinstance(ids, list): ids = np.array(ids) self.index.add(np.array(ids), np.array(embeddings)) self.docstore.add(dict(zip(ids, documents))) self.ids.extend(ids) return cast(List[str], ids.tolist())
[docs] def similarity_search_with_score( self, query: str, k: int = 4, ) -> List[Tuple[Document, float]]: """Return docs most similar to query. Args: query: Text to look up documents similar to. k: Number of Documents to return. Defaults to 4. Returns: List of documents most similar to the query with distance. """ query_embedding = self.embedding.embed_query(query) matches = self.index.search(np.array(query_embedding), k) docs_with_scores: List[Tuple[Document, float]] = [] for id, score in zip(matches.keys, matches.distances): doc = self.docstore.search(str(id)) if not isinstance(doc, Document): raise ValueError(f"Could not find document for id {id}, got {doc}") docs_with_scores.append((doc, score)) return docs_with_scores
[docs] @classmethod def from_texts( cls, texts: List[str], embedding: Embeddings, metadatas: Optional[List[Dict]] = None, ids: Optional[Union[np.ndarray, list[str]]] = None, metric: str = "cos", **kwargs: Any, ) -> USearch: """Construct USearch wrapper from raw documents. This is a user friendly interface that: 1. Embeds documents. 2. Creates an in memory docstore 3. Initializes the USearch database This is intended to be a quick way to get started. Example: .. code-block:: python from langchain_community.vectorstores import USearch from langchain_community.embeddings import OpenAIEmbeddings embeddings = OpenAIEmbeddings() usearch = USearch.from_texts(texts, embeddings) """ embeddings = embedding.embed_documents(texts) documents: List[Document] = [] if ids is None: ids = np.array([str(id) for id, _ in enumerate(texts)]) elif isinstance(ids, list): ids = np.array(ids) for i, text in enumerate(texts): metadata = metadatas[i] if metadatas else {} documents.append(Document(page_content=text, metadata=metadata)) docstore = InMemoryDocstore(dict(zip(ids, documents))) usearch = guard_import("usearch.index") index = usearch.Index(ndim=len(embeddings[0]), metric=metric) index.add(np.array(ids), np.array(embeddings)) return cls(embedding, index, docstore, cast(List[str], ids.tolist()))