Source code for langchain.chains.combine_documents.refine
"""Combine documents by doing a first pass and then refining on more documents."""from__future__importannotationsfromtypingimportAny,Dict,List,Tuplefromlangchain_core._apiimportdeprecatedfromlangchain_core.callbacksimportCallbacksfromlangchain_core.documentsimportDocumentfromlangchain_core.promptsimportBasePromptTemplate,format_documentfromlangchain_core.prompts.promptimportPromptTemplatefrompydanticimportConfigDict,Field,model_validatorfromlangchain.chains.combine_documents.baseimport(BaseCombineDocumentsChain,)fromlangchain.chains.llmimportLLMChaindef_get_default_document_prompt()->PromptTemplate:returnPromptTemplate(input_variables=["page_content"],template="{page_content}")
[docs]@deprecated(since="0.3.1",removal="1.0",message=("This class is deprecated. Please see the migration guide here for ""a recommended replacement: ""https://python.langchain.com/docs/versions/migrating_chains/refine_docs_chain/"# noqa: E501),)classRefineDocumentsChain(BaseCombineDocumentsChain):"""Combine documents by doing a first pass and then refining on more documents. This algorithm first calls `initial_llm_chain` on the first document, passing that first document in with the variable name `document_variable_name`, and produces a new variable with the variable name `initial_response_name`. Then, it loops over every remaining document. This is called the "refine" step. It calls `refine_llm_chain`, passing in that document with the variable name `document_variable_name` as well as the previous response with the variable name `initial_response_name`. Example: .. code-block:: python from langchain.chains import RefineDocumentsChain, LLMChain from langchain_core.prompts import PromptTemplate from langchain_community.llms import OpenAI # This controls how each document will be formatted. Specifically, # it will be passed to `format_document` - see that function for more # details. document_prompt = PromptTemplate( input_variables=["page_content"], template="{page_content}" ) document_variable_name = "context" llm = OpenAI() # The prompt here should take as an input variable the # `document_variable_name` prompt = PromptTemplate.from_template( "Summarize this content: {context}" ) initial_llm_chain = LLMChain(llm=llm, prompt=prompt) initial_response_name = "prev_response" # The prompt here should take as an input variable the # `document_variable_name` as well as `initial_response_name` prompt_refine = PromptTemplate.from_template( "Here's your first summary: {prev_response}. " "Now add to it based on the following context: {context}" ) refine_llm_chain = LLMChain(llm=llm, prompt=prompt_refine) chain = RefineDocumentsChain( initial_llm_chain=initial_llm_chain, refine_llm_chain=refine_llm_chain, document_prompt=document_prompt, document_variable_name=document_variable_name, initial_response_name=initial_response_name, ) """initial_llm_chain:LLMChain"""LLM chain to use on initial document."""refine_llm_chain:LLMChain"""LLM chain to use when refining."""document_variable_name:str"""The variable name in the initial_llm_chain to put the documents in. If only one variable in the initial_llm_chain, this need not be provided."""initial_response_name:str"""The variable name to format the initial response in when refining."""document_prompt:BasePromptTemplate=Field(default_factory=_get_default_document_prompt)"""Prompt to use to format each document, gets passed to `format_document`."""return_intermediate_steps:bool=False"""Return the results of the refine steps in the output."""@propertydefoutput_keys(self)->List[str]:"""Expect input key. :meta private: """_output_keys=super().output_keysifself.return_intermediate_steps:_output_keys=_output_keys+["intermediate_steps"]return_output_keysmodel_config=ConfigDict(arbitrary_types_allowed=True,extra="forbid",)@model_validator(mode="before")@classmethoddefget_return_intermediate_steps(cls,values:Dict)->Any:"""For backwards compatibility."""if"return_refine_steps"invalues:values["return_intermediate_steps"]=values["return_refine_steps"]delvalues["return_refine_steps"]returnvalues@model_validator(mode="before")@classmethoddefget_default_document_variable_name(cls,values:Dict)->Any:"""Get default document variable name, if not provided."""if"initial_llm_chain"notinvalues:raiseValueError("initial_llm_chain must be provided")llm_chain_variables=values["initial_llm_chain"].prompt.input_variablesif"document_variable_name"notinvalues:iflen(llm_chain_variables)==1:values["document_variable_name"]=llm_chain_variables[0]else:raiseValueError("document_variable_name must be provided if there are ""multiple llm_chain input_variables")else:ifvalues["document_variable_name"]notinllm_chain_variables:raiseValueError(f"document_variable_name {values['document_variable_name']} was "f"not found in llm_chain input_variables: {llm_chain_variables}")returnvalues
[docs]defcombine_docs(self,docs:List[Document],callbacks:Callbacks=None,**kwargs:Any)->Tuple[str,dict]:"""Combine by mapping first chain over all, then stuffing into final chain. Args: docs: List of documents to combine callbacks: Callbacks to be passed through **kwargs: additional parameters to be passed to LLM calls (like other input variables besides the documents) Returns: The first element returned is the single string output. The second element returned is a dictionary of other keys to return. """inputs=self._construct_initial_inputs(docs,**kwargs)res=self.initial_llm_chain.predict(callbacks=callbacks,**inputs)refine_steps=[res]fordocindocs[1:]:base_inputs=self._construct_refine_inputs(doc,res)inputs={**base_inputs,**kwargs}res=self.refine_llm_chain.predict(callbacks=callbacks,**inputs)refine_steps.append(res)returnself._construct_result(refine_steps,res)
[docs]asyncdefacombine_docs(self,docs:List[Document],callbacks:Callbacks=None,**kwargs:Any)->Tuple[str,dict]:"""Async combine by mapping a first chain over all, then stuffing into a final chain. Args: docs: List of documents to combine callbacks: Callbacks to be passed through **kwargs: additional parameters to be passed to LLM calls (like other input variables besides the documents) Returns: The first element returned is the single string output. The second element returned is a dictionary of other keys to return. """inputs=self._construct_initial_inputs(docs,**kwargs)res=awaitself.initial_llm_chain.apredict(callbacks=callbacks,**inputs)refine_steps=[res]fordocindocs[1:]:base_inputs=self._construct_refine_inputs(doc,res)inputs={**base_inputs,**kwargs}res=awaitself.refine_llm_chain.apredict(callbacks=callbacks,**inputs)refine_steps.append(res)returnself._construct_result(refine_steps,res)