Source code for langchain.agents.output_parsers.xml

import re
from typing import Literal, Optional, Union

from langchain_core.agents import AgentAction, AgentFinish
from pydantic import Field

from langchain.agents import AgentOutputParser


def _unescape(text: str) -> str:
    """Convert custom tag delimiters back into XML tags."""
    replacements = {
        "[[tool]]": "<tool>",
        "[[/tool]]": "</tool>",
        "[[tool_input]]": "<tool_input>",
        "[[/tool_input]]": "</tool_input>",
        "[[observation]]": "<observation>",
        "[[/observation]]": "</observation>",
    }
    for repl, orig in replacements.items():
        text = text.replace(repl, orig)
    return text


[docs] class XMLAgentOutputParser(AgentOutputParser): """Parses tool invocations and final answers from XML-formatted agent output. This parser extracts structured information from XML tags to determine whether an agent should perform a tool action or provide a final answer. It includes built-in escaping support to safely handle tool names and inputs containing XML special characters. Args: escape_format: The escaping format to use when parsing XML content. Supports 'minimal' which uses custom delimiters like [[tool]] to replace XML tags within content, preventing parsing conflicts. Use 'minimal' if using a corresponding encoding format that uses the _escape function when formatting the output (e.g., with format_xml). Expected formats: Tool invocation (returns AgentAction): <tool>search</tool> <tool_input>what is 2 + 2</tool_input> Final answer (returns AgentFinish): <final_answer>The answer is 4</final_answer> Note: Minimal escaping allows tool names containing XML tags to be safely represented. For example, a tool named "search<tool>nested</tool>" would be escaped as "search[[tool]]nested[[/tool]]" in the XML and automatically unescaped during parsing. Raises: ValueError: If the input doesn't match either expected XML format or contains malformed XML structure. """ escape_format: Optional[Literal["minimal"]] = Field(default="minimal") """The format to use for escaping XML characters. minimal - uses custom delimiters to replace XML tags within content, preventing parsing conflicts. This is the only supported format currently. None - no escaping is applied, which may lead to parsing conflicts. """
[docs] def parse(self, text: str) -> Union[AgentAction, AgentFinish]: # Check for tool invocation first tool_matches = re.findall(r"<tool>(.*?)</tool>", text, re.DOTALL) if tool_matches: if len(tool_matches) != 1: msg = ( f"Malformed tool invocation: expected exactly one <tool> block, " f"but found {len(tool_matches)}." ) raise ValueError(msg) _tool = tool_matches[0] # Match optional tool input input_matches = re.findall( r"<tool_input>(.*?)</tool_input>", text, re.DOTALL ) if len(input_matches) > 1: msg = ( f"Malformed tool invocation: expected at most one <tool_input> " f"block, but found {len(input_matches)}." ) raise ValueError(msg) _tool_input = input_matches[0] if input_matches else "" # Unescape if minimal escape format is used if self.escape_format == "minimal": _tool = _unescape(_tool) _tool_input = _unescape(_tool_input) return AgentAction(tool=_tool, tool_input=_tool_input, log=text) # Check for final answer if "<final_answer>" in text and "</final_answer>" in text: matches = re.findall(r"<final_answer>(.*?)</final_answer>", text, re.DOTALL) if len(matches) != 1: msg = ( "Malformed output: expected exactly one " "<final_answer>...</final_answer> block." ) raise ValueError(msg) answer = matches[0] # Unescape custom delimiters in final answer if self.escape_format == "minimal": answer = _unescape(answer) return AgentFinish(return_values={"output": answer}, log=text) msg = ( "Malformed output: expected either a tool invocation " "or a final answer in XML format." ) raise ValueError(msg)
[docs] def get_format_instructions(self) -> str: raise NotImplementedError
@property def _type(self) -> str: return "xml-agent"