Source code for langchain_core.indexing.in_memory

import uuid
from collections.abc import Sequence
from typing import Any, Optional, cast

from pydantic import Field

from langchain_core._api import beta
from langchain_core.callbacks import CallbackManagerForRetrieverRun
from langchain_core.documents import Document
from langchain_core.indexing import UpsertResponse
from langchain_core.indexing.base import DeleteResponse, DocumentIndex


[docs] @beta(message="Introduced in version 0.2.29. Underlying abstraction subject to change.") class InMemoryDocumentIndex(DocumentIndex): """In memory document index. This is an in-memory document index that stores documents in a dictionary. It provides a simple search API that returns documents by the number of counts the given query appears in the document. .. versionadded:: 0.2.29 """ store: dict[str, Document] = Field(default_factory=dict) top_k: int = 4
[docs] def upsert(self, items: Sequence[Document], /, **kwargs: Any) -> UpsertResponse: """Upsert items into the index.""" ok_ids = [] for item in items: if item.id is None: id_ = str(uuid.uuid4()) item_ = item.model_copy() item_.id = id_ else: item_ = item id_ = item.id self.store[id_] = item_ ok_ids.append(cast(str, item_.id)) return UpsertResponse(succeeded=ok_ids, failed=[])
[docs] def delete(self, ids: Optional[list[str]] = None, **kwargs: Any) -> DeleteResponse: """Delete by ID.""" if ids is None: msg = "IDs must be provided for deletion" raise ValueError(msg) ok_ids = [] for id_ in ids: if id_ in self.store: del self.store[id_] ok_ids.append(id_) return DeleteResponse( succeeded=ok_ids, num_deleted=len(ok_ids), num_failed=0, failed=[] )
[docs] def get(self, ids: Sequence[str], /, **kwargs: Any) -> list[Document]: """Get by ids.""" found_documents = [] for id_ in ids: if id_ in self.store: found_documents.append(self.store[id_]) return found_documents
def _get_relevant_documents( self, query: str, *, run_manager: CallbackManagerForRetrieverRun ) -> list[Document]: counts_by_doc = [] for document in self.store.values(): count = document.page_content.count(query) counts_by_doc.append((document, count)) counts_by_doc.sort(key=lambda x: x[1], reverse=True) return [doc.model_copy() for doc, count in counts_by_doc[: self.top_k]]