"""**Retriever** class returns Documents given a text **query**.It is more general than a vector store. A retriever does not need to be able tostore documents, only to return (or retrieve) it. Vector stores can be used asthe backbone of a retriever, but there are other types of retrievers as well.**Class hierarchy:**.. code-block:: BaseRetriever --> <name>Retriever # Examples: ArxivRetriever, MergerRetriever**Main helpers:**.. code-block:: RetrieverInput, RetrieverOutput, RetrieverLike, RetrieverOutputLike, Document, Serializable, Callbacks, CallbackManagerForRetrieverRun, AsyncCallbackManagerForRetrieverRun"""from__future__importannotationsimportwarningsfromabcimportABC,abstractmethodfrominspectimportsignaturefromtypingimportTYPE_CHECKING,Any,OptionalfrompydanticimportConfigDictfromtyping_extensionsimportSelf,TypedDictfromlangchain_core._apiimportdeprecatedfromlangchain_core.documentsimportDocumentfromlangchain_core.runnablesimport(Runnable,RunnableConfig,RunnableSerializable,ensure_config,)fromlangchain_core.runnables.configimportrun_in_executorifTYPE_CHECKING:fromlangchain_core.callbacks.managerimport(AsyncCallbackManagerForRetrieverRun,CallbackManagerForRetrieverRun,Callbacks,)RetrieverInput=strRetrieverOutput=list[Document]RetrieverLike=Runnable[RetrieverInput,RetrieverOutput]RetrieverOutputLike=Runnable[Any,RetrieverOutput]
[docs]classLangSmithRetrieverParams(TypedDict,total=False):"""LangSmith parameters for tracing."""ls_retriever_name:str"""Retriever name."""ls_vector_store_provider:Optional[str]"""Vector store provider."""ls_embedding_provider:Optional[str]"""Embedding provider."""ls_embedding_model:Optional[str]"""Embedding model."""
[docs]classBaseRetriever(RunnableSerializable[RetrieverInput,RetrieverOutput],ABC):"""Abstract base class for a Document retrieval system. A retrieval system is defined as something that can take string queries and return the most 'relevant' Documents from some source. Usage: A retriever follows the standard Runnable interface, and should be used via the standard Runnable methods of `invoke`, `ainvoke`, `batch`, `abatch`. Implementation: When implementing a custom retriever, the class should implement the `_get_relevant_documents` method to define the logic for retrieving documents. Optionally, an async native implementations can be provided by overriding the `_aget_relevant_documents` method. Example: A retriever that returns the first 5 documents from a list of documents .. code-block:: python from langchain_core.documents import Document from langchain_core.retrievers import BaseRetriever from typing import List class SimpleRetriever(BaseRetriever): docs: List[Document] k: int = 5 def _get_relevant_documents(self, query: str) -> List[Document]: \"\"\"Return the first k documents from the list of documents\"\"\" return self.docs[:self.k] async def _aget_relevant_documents(self, query: str) -> List[Document]: \"\"\"(Optional) async native implementation.\"\"\" return self.docs[:self.k] Example: A simple retriever based on a scikit-learn vectorizer .. code-block:: python from sklearn.metrics.pairwise import cosine_similarity class TFIDFRetriever(BaseRetriever, BaseModel): vectorizer: Any docs: List[Document] tfidf_array: Any k: int = 4 class Config: arbitrary_types_allowed = True def _get_relevant_documents(self, query: str) -> List[Document]: # Ip -- (n_docs,x), Op -- (n_docs,n_Feats) query_vec = self.vectorizer.transform([query]) # Op -- (n_docs,1) -- Cosine Sim with each doc results = cosine_similarity(self.tfidf_array, query_vec).reshape((-1,)) return [self.docs[i] for i in results.argsort()[-self.k :][::-1]] """# noqa: E501model_config=ConfigDict(arbitrary_types_allowed=True,)_new_arg_supported:bool=False_expects_other_args:bool=Falsetags:Optional[list[str]]=None"""Optional list of tags associated with the retriever. Defaults to None. These tags will be associated with each call to this retriever, and passed as arguments to the handlers defined in `callbacks`. You can use these to eg identify a specific instance of a retriever with its use case. """metadata:Optional[dict[str,Any]]=None"""Optional metadata associated with the retriever. Defaults to None. This metadata will be associated with each call to this retriever, and passed as arguments to the handlers defined in `callbacks`. You can use these to eg identify a specific instance of a retriever with its use case. """def__init_subclass__(cls,**kwargs:Any)->None:super().__init_subclass__(**kwargs)# Version upgrade for old retrievers that implemented the public# methods directly.ifcls.get_relevant_documents!=BaseRetriever.get_relevant_documents:warnings.warn("Retrievers must implement abstract `_get_relevant_documents` method"" instead of `get_relevant_documents`",DeprecationWarning,stacklevel=4,)swap=cls.get_relevant_documentscls.get_relevant_documents=(# type: ignore[assignment]BaseRetriever.get_relevant_documents)cls._get_relevant_documents=swap# type: ignore[assignment]if(hasattr(cls,"aget_relevant_documents")andcls.aget_relevant_documents!=BaseRetriever.aget_relevant_documents):warnings.warn("Retrievers must implement abstract `_aget_relevant_documents` method"" instead of `aget_relevant_documents`",DeprecationWarning,stacklevel=4,)aswap=cls.aget_relevant_documentscls.aget_relevant_documents=(# type: ignore[assignment]BaseRetriever.aget_relevant_documents)cls._aget_relevant_documents=aswap# type: ignore[assignment]parameters=signature(cls._get_relevant_documents).parameterscls._new_arg_supported=parameters.get("run_manager")isnotNoneif(notcls._new_arg_supportedandcls._aget_relevant_documents==BaseRetriever._aget_relevant_documents):# we need to tolerate no run_manager in _aget_relevant_documents signatureasyncdef_aget_relevant_documents(self:Self,query:str)->list[Document]:returnawaitrun_in_executor(None,self._get_relevant_documents,query)# type: ignorecls._aget_relevant_documents=_aget_relevant_documents# type: ignore[assignment]# If a V1 retriever broke the interface and expects additional argumentscls._expects_other_args=(len(set(parameters.keys())-{"self","query","run_manager"})>0)def_get_ls_params(self,**kwargs:Any)->LangSmithRetrieverParams:"""Get standard params for tracing."""default_retriever_name=self.get_name()ifdefault_retriever_name.startswith("Retriever"):default_retriever_name=default_retriever_name[9:]elifdefault_retriever_name.endswith("Retriever"):default_retriever_name=default_retriever_name[:-9]default_retriever_name=default_retriever_name.lower()ls_params=LangSmithRetrieverParams(ls_retriever_name=default_retriever_name)returnls_params
[docs]definvoke(self,input:str,config:Optional[RunnableConfig]=None,**kwargs:Any)->list[Document]:"""Invoke the retriever to get relevant documents. Main entry point for synchronous retriever invocations. Args: input: The query string. config: Configuration for the retriever. Defaults to None. kwargs: Additional arguments to pass to the retriever. Returns: List of relevant documents. Examples: .. code-block:: python retriever.invoke("query") """fromlangchain_core.callbacks.managerimportCallbackManagerconfig=ensure_config(config)inheritable_metadata={**(config.get("metadata")or{}),**self._get_ls_params(**kwargs),}callback_manager=CallbackManager.configure(config.get("callbacks"),None,verbose=kwargs.get("verbose",False),inheritable_tags=config.get("tags"),local_tags=self.tags,inheritable_metadata=inheritable_metadata,local_metadata=self.metadata,)run_manager=callback_manager.on_retriever_start(None,input,name=config.get("run_name")orself.get_name(),run_id=kwargs.pop("run_id",None),)try:_kwargs=kwargsifself._expects_other_argselse{}ifself._new_arg_supported:result=self._get_relevant_documents(input,run_manager=run_manager,**_kwargs)else:result=self._get_relevant_documents(input,**_kwargs)exceptExceptionase:run_manager.on_retriever_error(e)raiseelse:run_manager.on_retriever_end(result,)returnresult
[docs]asyncdefainvoke(self,input:str,config:Optional[RunnableConfig]=None,**kwargs:Any,)->list[Document]:"""Asynchronously invoke the retriever to get relevant documents. Main entry point for asynchronous retriever invocations. Args: input: The query string. config: Configuration for the retriever. Defaults to None. kwargs: Additional arguments to pass to the retriever. Returns: List of relevant documents. Examples: .. code-block:: python await retriever.ainvoke("query") """fromlangchain_core.callbacks.managerimportAsyncCallbackManagerconfig=ensure_config(config)inheritable_metadata={**(config.get("metadata")or{}),**self._get_ls_params(**kwargs),}callback_manager=AsyncCallbackManager.configure(config.get("callbacks"),None,verbose=kwargs.get("verbose",False),inheritable_tags=config.get("tags"),local_tags=self.tags,inheritable_metadata=inheritable_metadata,local_metadata=self.metadata,)run_manager=awaitcallback_manager.on_retriever_start(None,input,name=config.get("run_name")orself.get_name(),run_id=kwargs.pop("run_id",None),)try:_kwargs=kwargsifself._expects_other_argselse{}ifself._new_arg_supported:result=awaitself._aget_relevant_documents(input,run_manager=run_manager,**_kwargs)else:result=awaitself._aget_relevant_documents(input,**_kwargs)exceptExceptionase:awaitrun_manager.on_retriever_error(e)raiseelse:awaitrun_manager.on_retriever_end(result,)returnresult
@abstractmethoddef_get_relevant_documents(self,query:str,*,run_manager:CallbackManagerForRetrieverRun)->list[Document]:"""Get documents relevant to a query. Args: query: String to find relevant documents for. run_manager: The callback handler to use. Returns: List of relevant documents. """asyncdef_aget_relevant_documents(self,query:str,*,run_manager:AsyncCallbackManagerForRetrieverRun)->list[Document]:"""Asynchronously get documents relevant to a query. Args: query: String to find relevant documents for run_manager: The callback handler to use Returns: List of relevant documents """returnawaitrun_in_executor(None,self._get_relevant_documents,query,run_manager=run_manager.get_sync(),)
[docs]@deprecated(since="0.1.46",alternative="invoke",removal="1.0")defget_relevant_documents(self,query:str,*,callbacks:Callbacks=None,tags:Optional[list[str]]=None,metadata:Optional[dict[str,Any]]=None,run_name:Optional[str]=None,**kwargs:Any,)->list[Document]:"""Retrieve documents relevant to a query. Users should favor using `.invoke` or `.batch` rather than `get_relevant_documents directly`. Args: query: string to find relevant documents for. callbacks: Callback manager or list of callbacks. Defaults to None. tags: Optional list of tags associated with the retriever. These tags will be associated with each call to this retriever, and passed as arguments to the handlers defined in `callbacks`. Defaults to None. metadata: Optional metadata associated with the retriever. This metadata will be associated with each call to this retriever, and passed as arguments to the handlers defined in `callbacks`. Defaults to None. run_name: Optional name for the run. Defaults to None. kwargs: Additional arguments to pass to the retriever. Returns: List of relevant documents. """config:RunnableConfig={}ifcallbacks:config["callbacks"]=callbacksiftags:config["tags"]=tagsifmetadata:config["metadata"]=metadataifrun_name:config["run_name"]=run_namereturnself.invoke(query,config,**kwargs)
[docs]@deprecated(since="0.1.46",alternative="ainvoke",removal="1.0")asyncdefaget_relevant_documents(self,query:str,*,callbacks:Callbacks=None,tags:Optional[list[str]]=None,metadata:Optional[dict[str,Any]]=None,run_name:Optional[str]=None,**kwargs:Any,)->list[Document]:"""Asynchronously get documents relevant to a query. Users should favor using `.ainvoke` or `.abatch` rather than `aget_relevant_documents directly`. Args: query: string to find relevant documents for. callbacks: Callback manager or list of callbacks. tags: Optional list of tags associated with the retriever. These tags will be associated with each call to this retriever, and passed as arguments to the handlers defined in `callbacks`. Defaults to None. metadata: Optional metadata associated with the retriever. This metadata will be associated with each call to this retriever, and passed as arguments to the handlers defined in `callbacks`. Defaults to None. run_name: Optional name for the run. Defaults to None. kwargs: Additional arguments to pass to the retriever. Returns: List of relevant documents. """config:RunnableConfig={}ifcallbacks:config["callbacks"]=callbacksiftags:config["tags"]=tagsifmetadata:config["metadata"]=metadataifrun_name:config["run_name"]=run_namereturnawaitself.ainvoke(query,config,**kwargs)