Source code for langchain.chains.combine_documents.map_rerank
"""Combining documents by mapping a chain over them first, then reranking results."""from__future__importannotationsfromtypingimportAny,Dict,List,Optional,Sequence,Tuple,Type,Union,castfromlangchain_core._apiimportdeprecatedfromlangchain_core.callbacksimportCallbacksfromlangchain_core.documentsimportDocumentfromlangchain_core.runnables.configimportRunnableConfigfromlangchain_core.runnables.utilsimportcreate_modelfrompydanticimportBaseModel,ConfigDict,model_validatorfromtyping_extensionsimportSelffromlangchain.chains.combine_documents.baseimportBaseCombineDocumentsChainfromlangchain.chains.llmimportLLMChainfromlangchain.output_parsers.regeximportRegexParser
[docs]@deprecated(since="0.3.1",removal="1.0",message=("This class is deprecated. Please see the migration guide here for ""a recommended replacement: ""https://python.langchain.com/docs/versions/migrating_chains/map_rerank_docs_chain/"# noqa: E501),)classMapRerankDocumentsChain(BaseCombineDocumentsChain):"""Combining documents by mapping a chain over them, then reranking results. This algorithm calls an LLMChain on each input document. The LLMChain is expected to have an OutputParser that parses the result into both an answer (`answer_key`) and a score (`rank_key`). The answer with the highest score is then returned. Example: .. code-block:: python from langchain.chains import MapRerankDocumentsChain, LLMChain from langchain_core.prompts import PromptTemplate from langchain_community.llms import OpenAI from langchain.output_parsers.regex import RegexParser document_variable_name = "context" llm = OpenAI() # The prompt here should take as an input variable the # `document_variable_name` # The actual prompt will need to be a lot more complex, this is just # an example. prompt_template = ( "Use the following context to tell me the chemical formula " "for water. Output both your answer and a score of how confident " "you are. Context: {context}" ) output_parser = RegexParser( regex=r"(.*?)\nScore: (.*)", output_keys=["answer", "score"], ) prompt = PromptTemplate( template=prompt_template, input_variables=["context"], output_parser=output_parser, ) llm_chain = LLMChain(llm=llm, prompt=prompt) chain = MapRerankDocumentsChain( llm_chain=llm_chain, document_variable_name=document_variable_name, rank_key="score", answer_key="answer", ) """llm_chain:LLMChain"""Chain to apply to each document individually."""document_variable_name:str"""The variable name in the llm_chain to put the documents in. If only one variable in the llm_chain, this need not be provided."""rank_key:str"""Key in output of llm_chain to rank on."""answer_key:str"""Key in output of llm_chain to return as answer."""metadata_keys:Optional[List[str]]=None"""Additional metadata from the chosen document to return."""return_intermediate_steps:bool=False"""Return intermediate steps. Intermediate steps include the results of calling llm_chain on each document."""model_config=ConfigDict(arbitrary_types_allowed=True,extra="forbid",)defget_output_schema(self,config:Optional[RunnableConfig]=None)->Type[BaseModel]:schema:Dict[str,Any]={self.output_key:(str,None),}ifself.return_intermediate_steps:schema["intermediate_steps"]=(List[str],None)ifself.metadata_keys:schema.update({key:(Any,None)forkeyinself.metadata_keys})returncreate_model("MapRerankOutput",**schema)@propertydefoutput_keys(self)->List[str]:"""Expect input key. :meta private: """_output_keys=super().output_keysifself.return_intermediate_steps:_output_keys=_output_keys+["intermediate_steps"]ifself.metadata_keysisnotNone:_output_keys+=self.metadata_keysreturn_output_keys@model_validator(mode="after")defvalidate_llm_output(self)->Self:"""Validate that the combine chain outputs a dictionary."""output_parser=self.llm_chain.prompt.output_parserifnotisinstance(output_parser,RegexParser):raiseValueError("Output parser of llm_chain should be a RegexParser,"f" got {output_parser}")output_keys=output_parser.output_keysifself.rank_keynotinoutput_keys:raiseValueError(f"Got {self.rank_key} as key to rank on, but did not find "f"it in the llm_chain output keys ({output_keys})")ifself.answer_keynotinoutput_keys:raiseValueError(f"Got {self.answer_key} as key to return, but did not find "f"it in the llm_chain output keys ({output_keys})")returnself@model_validator(mode="before")@classmethoddefget_default_document_variable_name(cls,values:Dict)->Any:"""Get default document variable name, if not provided."""if"llm_chain"notinvalues:raiseValueError("llm_chain must be provided")llm_chain_variables=values["llm_chain"].prompt.input_variablesif"document_variable_name"notinvalues:iflen(llm_chain_variables)==1:values["document_variable_name"]=llm_chain_variables[0]else:raiseValueError("document_variable_name must be provided if there are ""multiple llm_chain input_variables")else:ifvalues["document_variable_name"]notinllm_chain_variables:raiseValueError(f"document_variable_name {values['document_variable_name']} was "f"not found in llm_chain input_variables: {llm_chain_variables}")returnvalues
[docs]defcombine_docs(self,docs:List[Document],callbacks:Callbacks=None,**kwargs:Any)->Tuple[str,dict]:"""Combine documents in a map rerank manner. Combine by mapping first chain over all documents, then reranking the results. Args: docs: List of documents to combine callbacks: Callbacks to be passed through **kwargs: additional parameters to be passed to LLM calls (like other input variables besides the documents) Returns: The first element returned is the single string output. The second element returned is a dictionary of other keys to return. """results=self.llm_chain.apply_and_parse(# FYI - this is parallelized and so it is fast.[{**{self.document_variable_name:d.page_content},**kwargs}fordindocs],callbacks=callbacks,)returnself._process_results(docs,results)
[docs]asyncdefacombine_docs(self,docs:List[Document],callbacks:Callbacks=None,**kwargs:Any)->Tuple[str,dict]:"""Combine documents in a map rerank manner. Combine by mapping first chain over all documents, then reranking the results. Args: docs: List of documents to combine callbacks: Callbacks to be passed through **kwargs: additional parameters to be passed to LLM calls (like other input variables besides the documents) Returns: The first element returned is the single string output. The second element returned is a dictionary of other keys to return. """results=awaitself.llm_chain.aapply_and_parse(# FYI - this is parallelized and so it is fast.[{**{self.document_variable_name:d.page_content},**kwargs}fordindocs],callbacks=callbacks,)returnself._process_results(docs,results)