Source code for langchain_core.indexing.in_memory
"""In memory document index."""
import operator
import uuid
from collections.abc import Sequence
from typing import Any, Optional, cast
from pydantic import Field
from typing_extensions import override
from langchain_core._api import beta
from langchain_core.callbacks import CallbackManagerForRetrieverRun
from langchain_core.documents import Document
from langchain_core.indexing import UpsertResponse
from langchain_core.indexing.base import DeleteResponse, DocumentIndex
[docs]
@beta(message="Introduced in version 0.2.29. Underlying abstraction subject to change.")
class InMemoryDocumentIndex(DocumentIndex):
"""In memory document index.
This is an in-memory document index that stores documents in a dictionary.
It provides a simple search API that returns documents by the number of
counts the given query appears in the document.
.. versionadded:: 0.2.29
"""
store: dict[str, Document] = Field(default_factory=dict)
top_k: int = 4
[docs]
@override
def upsert(self, items: Sequence[Document], /, **kwargs: Any) -> UpsertResponse:
"""Upsert documents into the index.
Args:
items: Sequence of documents to add to the index.
**kwargs: Additional keyword arguments.
Returns:
A response object that contains the list of IDs that were
successfully added or updated in the index and the list of IDs that
failed to be added or updated.
"""
ok_ids = []
for item in items:
if item.id is None:
id_ = str(uuid.uuid4())
item_ = item.model_copy()
item_.id = id_
else:
item_ = item
id_ = item.id
self.store[id_] = item_
ok_ids.append(cast("str", item_.id))
return UpsertResponse(succeeded=ok_ids, failed=[])
[docs]
@override
def delete(self, ids: Optional[list[str]] = None, **kwargs: Any) -> DeleteResponse:
"""Delete by IDs.
Args:
ids: List of ids to delete.
Raises:
ValueError: If ids is None.
Returns:
A response object that contains the list of IDs that were successfully
deleted and the list of IDs that failed to be deleted.
"""
if ids is None:
msg = "IDs must be provided for deletion"
raise ValueError(msg)
ok_ids = []
for id_ in ids:
if id_ in self.store:
del self.store[id_]
ok_ids.append(id_)
return DeleteResponse(
succeeded=ok_ids, num_deleted=len(ok_ids), num_failed=0, failed=[]
)
[docs]
@override
def get(self, ids: Sequence[str], /, **kwargs: Any) -> list[Document]:
return [self.store[id_] for id_ in ids if id_ in self.store]
@override
def _get_relevant_documents(
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
) -> list[Document]:
counts_by_doc = []
for document in self.store.values():
count = document.page_content.count(query)
counts_by_doc.append((document, count))
counts_by_doc.sort(key=operator.itemgetter(1), reverse=True)
return [doc.model_copy() for doc, count in counts_by_doc[: self.top_k]]