Source code for langchain_core.output_parsers.list

from __future__ import annotations

import csv
import re
from abc import abstractmethod
from collections import deque
from collections.abc import AsyncIterator, Iterator
from io import StringIO
from typing import Optional as Optional
from typing import TypeVar, Union

from langchain_core.messages import BaseMessage
from langchain_core.output_parsers.transform import BaseTransformOutputParser

T = TypeVar("T")


[docs] def droplastn(iter: Iterator[T], n: int) -> Iterator[T]: """Drop the last n elements of an iterator. Args: iter: The iterator to drop elements from. n: The number of elements to drop. Yields: The elements of the iterator, except the last n elements. """ buffer: deque[T] = deque() for item in iter: buffer.append(item) if len(buffer) > n: yield buffer.popleft()
[docs] class ListOutputParser(BaseTransformOutputParser[list[str]]): """Parse the output of an LLM call to a list.""" @property def _type(self) -> str: return "list"
[docs] @abstractmethod def parse(self, text: str) -> list[str]: """Parse the output of an LLM call. Args: text: The output of an LLM call. Returns: A list of strings. """
[docs] def parse_iter(self, text: str) -> Iterator[re.Match]: """Parse the output of an LLM call. Args: text: The output of an LLM call. Yields: A match object for each part of the output. """ raise NotImplementedError
def _transform( self, input: Iterator[Union[str, BaseMessage]] ) -> Iterator[list[str]]: buffer = "" for chunk in input: if isinstance(chunk, BaseMessage): # extract text chunk_content = chunk.content if not isinstance(chunk_content, str): continue chunk = chunk_content # add current chunk to buffer buffer += chunk # parse buffer into a list of parts try: done_idx = 0 # yield only complete parts for m in droplastn(self.parse_iter(buffer), 1): done_idx = m.end() yield [m.group(1)] buffer = buffer[done_idx:] except NotImplementedError: parts = self.parse(buffer) # yield only complete parts if len(parts) > 1: for part in parts[:-1]: yield [part] buffer = parts[-1] # yield the last part for part in self.parse(buffer): yield [part] async def _atransform( self, input: AsyncIterator[Union[str, BaseMessage]] ) -> AsyncIterator[list[str]]: buffer = "" async for chunk in input: if isinstance(chunk, BaseMessage): # extract text chunk_content = chunk.content if not isinstance(chunk_content, str): continue chunk = chunk_content # add current chunk to buffer buffer += chunk # parse buffer into a list of parts try: done_idx = 0 # yield only complete parts for m in droplastn(self.parse_iter(buffer), 1): done_idx = m.end() yield [m.group(1)] buffer = buffer[done_idx:] except NotImplementedError: parts = self.parse(buffer) # yield only complete parts if len(parts) > 1: for part in parts[:-1]: yield [part] buffer = parts[-1] # yield the last part for part in self.parse(buffer): yield [part]
ListOutputParser.model_rebuild()
[docs] class CommaSeparatedListOutputParser(ListOutputParser): """Parse the output of an LLM call to a comma-separated list.""" @classmethod def is_lc_serializable(cls) -> bool: """Check if the langchain object is serializable. Returns True.""" return True @classmethod def get_lc_namespace(cls) -> list[str]: """Get the namespace of the langchain object. Returns: A list of strings. Default is ["langchain", "output_parsers", "list"]. """ return ["langchain", "output_parsers", "list"]
[docs] def get_format_instructions(self) -> str: """Return the format instructions for the comma-separated list output.""" return ( "Your response should be a list of comma separated values, " "eg: `foo, bar, baz` or `foo,bar,baz`" )
[docs] def parse(self, text: str) -> list[str]: """Parse the output of an LLM call. Args: text: The output of an LLM call. Returns: A list of strings. """ try: reader = csv.reader( StringIO(text), quotechar='"', delimiter=",", skipinitialspace=True ) return [item for sublist in reader for item in sublist] except csv.Error: # keep old logic for backup return [part.strip() for part in text.split(",")]
@property def _type(self) -> str: return "comma-separated-list"
[docs] class NumberedListOutputParser(ListOutputParser): """Parse a numbered list.""" pattern: str = r"\d+\.\s([^\n]+)" """The pattern to match a numbered list item."""
[docs] def get_format_instructions(self) -> str: return ( "Your response should be a numbered list with each item on a new line. " "For example: \n\n1. foo\n\n2. bar\n\n3. baz" )
[docs] def parse(self, text: str) -> list[str]: """Parse the output of an LLM call. Args: text: The output of an LLM call. Returns: A list of strings. """ return re.findall(self.pattern, text)
[docs] def parse_iter(self, text: str) -> Iterator[re.Match]: """Parse the output of an LLM call. Args: text: The output of an LLM call. Yields: A match object for each part of the output. """ return re.finditer(self.pattern, text)
@property def _type(self) -> str: return "numbered-list"
[docs] class MarkdownListOutputParser(ListOutputParser): """Parse a Markdown list.""" pattern: str = r"^\s*[-*]\s([^\n]+)$" """The pattern to match a Markdown list item."""
[docs] def get_format_instructions(self) -> str: """Return the format instructions for the Markdown list output.""" return "Your response should be a markdown list, " "eg: `- foo\n- bar\n- baz`"
[docs] def parse(self, text: str) -> list[str]: """Parse the output of an LLM call. Args: text: The output of an LLM call. Returns: A list of strings. """ return re.findall(self.pattern, text, re.MULTILINE)
[docs] def parse_iter(self, text: str) -> Iterator[re.Match]: """Parse the output of an LLM call. Args: text: The output of an LLM call. Yields: A match object for each part of the output. """ return re.finditer(self.pattern, text, re.MULTILINE)
@property def _type(self) -> str: return "markdown-list"