Source code for langchain.chains.combine_documents.stuff
"""Chain that combines documents by stuffing into context."""fromtypingimportAny,Dict,List,Optional,Tuplefromlangchain_core._apiimportdeprecatedfromlangchain_core.callbacksimportCallbacksfromlangchain_core.documentsimportDocumentfromlangchain_core.language_modelsimportLanguageModelLikefromlangchain_core.output_parsersimportBaseOutputParser,StrOutputParserfromlangchain_core.promptsimportBasePromptTemplate,format_documentfromlangchain_core.runnablesimportRunnable,RunnablePassthroughfrompydanticimportConfigDict,Field,model_validatorfromlangchain.chains.combine_documents.baseimport(DEFAULT_DOCUMENT_PROMPT,DEFAULT_DOCUMENT_SEPARATOR,DOCUMENTS_KEY,BaseCombineDocumentsChain,_validate_prompt,)fromlangchain.chains.llmimportLLMChain
[docs]defcreate_stuff_documents_chain(llm:LanguageModelLike,prompt:BasePromptTemplate,*,output_parser:Optional[BaseOutputParser]=None,document_prompt:Optional[BasePromptTemplate]=None,document_separator:str=DEFAULT_DOCUMENT_SEPARATOR,document_variable_name:str=DOCUMENTS_KEY,)->Runnable[Dict[str,Any],Any]:"""Create a chain for passing a list of Documents to a model. Args: llm: Language model. prompt: Prompt template. Must contain input variable "context" (override by setting document_variable), which will be used for passing in the formatted documents. output_parser: Output parser. Defaults to StrOutputParser. document_prompt: Prompt used for formatting each document into a string. Input variables can be "page_content" or any metadata keys that are in all documents. "page_content" will automatically retrieve the `Document.page_content`, and all other inputs variables will be automatically retrieved from the `Document.metadata` dictionary. Default to a prompt that only contains `Document.page_content`. document_separator: String separator to use between formatted document strings. document_variable_name: Variable name to use for the formatted documents in the prompt. Defaults to "context". Returns: An LCEL Runnable. The input is a dictionary that must have a "context" key that maps to a List[Document], and any other input variables expected in the prompt. The Runnable return type depends on output_parser used. Example: .. code-block:: python # pip install -U langchain langchain-community from langchain_community.chat_models import ChatOpenAI from langchain_core.documents import Document from langchain_core.prompts import ChatPromptTemplate from langchain.chains.combine_documents import create_stuff_documents_chain prompt = ChatPromptTemplate.from_messages( [("system", "What are everyone's favorite colors:\\n\\n{context}")] ) llm = ChatOpenAI(model="gpt-3.5-turbo") chain = create_stuff_documents_chain(llm, prompt) docs = [ Document(page_content="Jesse loves red but not yellow"), Document(page_content = "Jamal loves green but not as much as he loves orange") ] chain.invoke({"context": docs}) """# noqa: E501_validate_prompt(prompt,document_variable_name)_document_prompt=document_promptorDEFAULT_DOCUMENT_PROMPT_output_parser=output_parserorStrOutputParser()defformat_docs(inputs:dict)->str:returndocument_separator.join(format_document(doc,_document_prompt)fordocininputs[document_variable_name])return(RunnablePassthrough.assign(**{document_variable_name:format_docs}).with_config(run_name="format_inputs")|prompt|llm|_output_parser).with_config(run_name="stuff_documents_chain")
[docs]@deprecated(since="0.2.13",removal="1.0",message=("This class is deprecated. Use the `create_stuff_documents_chain` constructor ""instead. See migration guide here: ""https://python.langchain.com/docs/versions/migrating_chains/stuff_docs_chain/"# noqa: E501),)classStuffDocumentsChain(BaseCombineDocumentsChain):"""Chain that combines documents by stuffing into context. This chain takes a list of documents and first combines them into a single string. It does this by formatting each document into a string with the `document_prompt` and then joining them together with `document_separator`. It then adds that new string to the inputs with the variable name set by `document_variable_name`. Those inputs are then passed to the `llm_chain`. Example: .. code-block:: python from langchain.chains import StuffDocumentsChain, LLMChain from langchain_core.prompts import PromptTemplate from langchain_community.llms import OpenAI # This controls how each document will be formatted. Specifically, # it will be passed to `format_document` - see that function for more # details. document_prompt = PromptTemplate( input_variables=["page_content"], template="{page_content}" ) document_variable_name = "context" llm = OpenAI() # The prompt here should take as an input variable the # `document_variable_name` prompt = PromptTemplate.from_template( "Summarize this content: {context}" ) llm_chain = LLMChain(llm=llm, prompt=prompt) chain = StuffDocumentsChain( llm_chain=llm_chain, document_prompt=document_prompt, document_variable_name=document_variable_name ) """llm_chain:LLMChain"""LLM chain which is called with the formatted document string, along with any other inputs."""document_prompt:BasePromptTemplate=Field(default_factory=lambda:DEFAULT_DOCUMENT_PROMPT)"""Prompt to use to format each document, gets passed to `format_document`."""document_variable_name:str"""The variable name in the llm_chain to put the documents in. If only one variable in the llm_chain, this need not be provided."""document_separator:str="\n\n""""The string with which to join the formatted documents"""model_config=ConfigDict(arbitrary_types_allowed=True,extra="forbid",)@model_validator(mode="before")@classmethoddefget_default_document_variable_name(cls,values:Dict)->Any:"""Get default document variable name, if not provided. If only one variable is present in the llm_chain.prompt, we can infer that the formatted documents should be passed in with this variable name. """llm_chain_variables=values["llm_chain"].prompt.input_variablesif"document_variable_name"notinvalues:iflen(llm_chain_variables)==1:values["document_variable_name"]=llm_chain_variables[0]else:raiseValueError("document_variable_name must be provided if there are ""multiple llm_chain_variables")else:ifvalues["document_variable_name"]notinllm_chain_variables:raiseValueError(f"document_variable_name {values['document_variable_name']} was "f"not found in llm_chain input_variables: {llm_chain_variables}")returnvalues@propertydefinput_keys(self)->List[str]:extra_keys=[kforkinself.llm_chain.input_keysifk!=self.document_variable_name]returnsuper().input_keys+extra_keysdef_get_inputs(self,docs:List[Document],**kwargs:Any)->dict:"""Construct inputs from kwargs and docs. Format and then join all the documents together into one input with name `self.document_variable_name`. Also pluck any additional variables from **kwargs. Args: docs: List of documents to format and then join into single input **kwargs: additional inputs to chain, will pluck any other required arguments from here. Returns: dictionary of inputs to LLMChain """# Format each document according to the promptdoc_strings=[format_document(doc,self.document_prompt)fordocindocs]# Join the documents together to put them in the prompt.inputs={k:vfork,vinkwargs.items()ifkinself.llm_chain.prompt.input_variables}inputs[self.document_variable_name]=self.document_separator.join(doc_strings)returninputs
[docs]defprompt_length(self,docs:List[Document],**kwargs:Any)->Optional[int]:"""Return the prompt length given the documents passed in. This can be used by a caller to determine whether passing in a list of documents would exceed a certain prompt length. This useful when trying to ensure that the size of a prompt remains below a certain context limit. Args: docs: List[Document], a list of documents to use to calculate the total prompt length. Returns: Returns None if the method does not depend on the prompt length, otherwise the length of the prompt in tokens. """inputs=self._get_inputs(docs,**kwargs)prompt=self.llm_chain.prompt.format(**inputs)returnself.llm_chain._get_num_tokens(prompt)
[docs]defcombine_docs(self,docs:List[Document],callbacks:Callbacks=None,**kwargs:Any)->Tuple[str,dict]:"""Stuff all documents into one prompt and pass to LLM. Args: docs: List of documents to join together into one variable callbacks: Optional callbacks to pass along **kwargs: additional parameters to use to get inputs to LLMChain. Returns: The first element returned is the single string output. The second element returned is a dictionary of other keys to return. """inputs=self._get_inputs(docs,**kwargs)# Call predict on the LLM.returnself.llm_chain.predict(callbacks=callbacks,**inputs),{}
[docs]asyncdefacombine_docs(self,docs:List[Document],callbacks:Callbacks=None,**kwargs:Any)->Tuple[str,dict]:"""Async stuff all documents into one prompt and pass to LLM. Args: docs: List of documents to join together into one variable callbacks: Optional callbacks to pass along **kwargs: additional parameters to use to get inputs to LLMChain. Returns: The first element returned is the single string output. The second element returned is a dictionary of other keys to return. """inputs=self._get_inputs(docs,**kwargs)# Call predict on the LLM.returnawaitself.llm_chain.apredict(callbacks=callbacks,**inputs),{}