Source code for langchain_core.example_selectors.length_based

"""Select examples based on length."""

import re
from typing import Callable

from pydantic import BaseModel, Field, model_validator
from typing_extensions import Self

from langchain_core.example_selectors.base import BaseExampleSelector
from langchain_core.prompts.prompt import PromptTemplate


def _get_length_based(text: str) -> int:
    return len(re.split("\n| ", text))


[docs] class LengthBasedExampleSelector(BaseExampleSelector, BaseModel): """Select examples based on length.""" examples: list[dict] """A list of the examples that the prompt template expects.""" example_prompt: PromptTemplate """Prompt template used to format the examples.""" get_text_length: Callable[[str], int] = _get_length_based """Function to measure prompt length. Defaults to word count.""" max_length: int = 2048 """Max length for the prompt, beyond which examples are cut.""" example_text_lengths: list[int] = Field(default_factory=list) # :meta private: """Length of each example."""
[docs] def add_example(self, example: dict[str, str]) -> None: """Add new example to list. Args: example: A dictionary with keys as input variables and values as their values. """ self.examples.append(example) string_example = self.example_prompt.format(**example) self.example_text_lengths.append(self.get_text_length(string_example))
[docs] async def aadd_example(self, example: dict[str, str]) -> None: """Async add new example to list. Args: example: A dictionary with keys as input variables and values as their values. """ self.add_example(example)
@model_validator(mode="after") def post_init(self) -> Self: """Validate that the examples are formatted correctly.""" if self.example_text_lengths: return self string_examples = [self.example_prompt.format(**eg) for eg in self.examples] self.example_text_lengths = [self.get_text_length(eg) for eg in string_examples] return self
[docs] def select_examples(self, input_variables: dict[str, str]) -> list[dict]: """Select which examples to use based on the input lengths. Args: input_variables: A dictionary with keys as input variables and values as their values. Returns: A list of examples to include in the prompt. """ inputs = " ".join(input_variables.values()) remaining_length = self.max_length - self.get_text_length(inputs) i = 0 examples = [] while remaining_length > 0 and i < len(self.examples): new_length = remaining_length - self.example_text_lengths[i] if new_length < 0: break else: examples.append(self.examples[i]) remaining_length = new_length i += 1 return examples
[docs] async def aselect_examples(self, input_variables: dict[str, str]) -> list[dict]: """Async select which examples to use based on the input lengths. Args: input_variables: A dictionary with keys as input variables and values as their values. Returns: A list of examples to include in the prompt. """ return self.select_examples(input_variables)