Source code for langchain.chains.openai_functions.citation_fuzzy_match
from typing import Iterator, List
from langchain_core._api import deprecated
from langchain_core.language_models import BaseChatModel, BaseLanguageModel
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.output_parsers.openai_functions import PydanticOutputFunctionsParser
from langchain_core.prompts.chat import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.runnables import Runnable
from langchain.chains.llm import LLMChain
from langchain.chains.openai_functions.utils import get_llm_kwargs
[docs]class FactWithEvidence(BaseModel):
"""Class representing a single statement.
Each fact has a body and a list of sources.
If there are multiple facts make sure to break them apart
such that each one only uses a set of sources that are relevant to it.
"""
fact: str = Field(..., description="Body of the sentence, as part of a response")
substring_quote: List[str] = Field(
...,
description=(
"Each source should be a direct quote from the context, "
"as a substring of the original content"
),
)
def _get_span(self, quote: str, context: str, errs: int = 100) -> Iterator[str]:
import regex
minor = quote
major = context
errs_ = 0
s = regex.search(f"({minor}){{e<={errs_}}}", major)
while s is None and errs_ <= errs:
errs_ += 1
s = regex.search(f"({minor}){{e<={errs_}}}", major)
if s is not None:
yield from s.spans()
[docs] def get_spans(self, context: str) -> Iterator[str]:
for quote in self.substring_quote:
yield from self._get_span(quote, context)
[docs]class QuestionAnswer(BaseModel):
"""A question and its answer as a list of facts each one should have a source.
each sentence contains a body and a list of sources."""
question: str = Field(..., description="Question that was asked")
answer: List[FactWithEvidence] = Field(
...,
description=(
"Body of the answer, each fact should be "
"its separate object with a body and a list of sources"
),
)
[docs]def create_citation_fuzzy_match_runnable(llm: BaseChatModel) -> Runnable:
"""Create a citation fuzzy match Runnable.
Example usage:
.. code-block:: python
from langchain.chains import create_citation_fuzzy_match_runnable
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-4o-mini")
context = "Alice has blue eyes. Bob has brown eyes. Charlie has green eyes."
question = "What color are Bob's eyes?"
chain = create_citation_fuzzy_match_runnable(llm)
chain.invoke({"question": question, "context": context})
Args:
llm: Language model to use for the chain. Must implement bind_tools.
Returns:
Runnable that can be used to answer questions with citations.
"""
if llm.bind_tools is BaseChatModel.bind_tools:
raise ValueError(
"Language model must implement bind_tools to use this function."
)
prompt = ChatPromptTemplate(
[
SystemMessage(
"You are a world class algorithm to answer "
"questions with correct and exact citations."
),
HumanMessagePromptTemplate.from_template(
"Answer question using the following context."
"\n\n{context}"
"\n\nQuestion: {question}"
"\n\nTips: Make sure to cite your sources, "
"and use the exact words from the context."
),
]
)
return prompt | llm.with_structured_output(QuestionAnswer)
[docs]@deprecated(
since="0.2.13",
removal="1.0",
alternative="create_citation_fuzzy_match_runnable",
)
def create_citation_fuzzy_match_chain(llm: BaseLanguageModel) -> LLMChain:
"""Create a citation fuzzy match chain.
Args:
llm: Language model to use for the chain.
Returns:
Chain (LLMChain) that can be used to answer questions with citations.
"""
output_parser = PydanticOutputFunctionsParser(pydantic_schema=QuestionAnswer)
schema = QuestionAnswer.schema()
function = {
"name": schema["title"],
"description": schema["description"],
"parameters": schema,
}
llm_kwargs = get_llm_kwargs(function)
messages = [
SystemMessage(
content=(
"You are a world class algorithm to answer "
"questions with correct and exact citations."
)
),
HumanMessage(content="Answer question using the following context"),
HumanMessagePromptTemplate.from_template("{context}"),
HumanMessagePromptTemplate.from_template("Question: {question}"),
HumanMessage(
content=(
"Tips: Make sure to cite your sources, "
"and use the exact words from the context."
)
),
]
prompt = ChatPromptTemplate(messages=messages) # type: ignore[arg-type, call-arg]
chain = LLMChain(
llm=llm,
prompt=prompt,
llm_kwargs=llm_kwargs,
output_parser=output_parser,
)
return chain