Source code for langchain.agents.output_parsers.xml
importrefromtypingimportLiteral,Optional,Unionfromlangchain_core.agentsimportAgentAction,AgentFinishfrompydanticimportFieldfromlangchain.agentsimportAgentOutputParserdef_unescape(text:str)->str:"""Convert custom tag delimiters back into XML tags."""replacements={"[[tool]]":"<tool>","[[/tool]]":"</tool>","[[tool_input]]":"<tool_input>","[[/tool_input]]":"</tool_input>","[[observation]]":"<observation>","[[/observation]]":"</observation>",}forrepl,originreplacements.items():text=text.replace(repl,orig)returntext
[docs]classXMLAgentOutputParser(AgentOutputParser):"""Parses tool invocations and final answers from XML-formatted agent output. This parser extracts structured information from XML tags to determine whether an agent should perform a tool action or provide a final answer. It includes built-in escaping support to safely handle tool names and inputs containing XML special characters. Args: escape_format: The escaping format to use when parsing XML content. Supports 'minimal' which uses custom delimiters like [[tool]] to replace XML tags within content, preventing parsing conflicts. Use 'minimal' if using a corresponding encoding format that uses the _escape function when formatting the output (e.g., with format_xml). Expected formats: Tool invocation (returns AgentAction): <tool>search</tool> <tool_input>what is 2 + 2</tool_input> Final answer (returns AgentFinish): <final_answer>The answer is 4</final_answer> Note: Minimal escaping allows tool names containing XML tags to be safely represented. For example, a tool named "search<tool>nested</tool>" would be escaped as "search[[tool]]nested[[/tool]]" in the XML and automatically unescaped during parsing. Raises: ValueError: If the input doesn't match either expected XML format or contains malformed XML structure. """escape_format:Optional[Literal["minimal"]]=Field(default="minimal")"""The format to use for escaping XML characters. minimal - uses custom delimiters to replace XML tags within content, preventing parsing conflicts. This is the only supported format currently. None - no escaping is applied, which may lead to parsing conflicts. """
[docs]defparse(self,text:str)->Union[AgentAction,AgentFinish]:# Check for tool invocation firsttool_matches=re.findall(r"<tool>(.*?)</tool>",text,re.DOTALL)iftool_matches:iflen(tool_matches)!=1:msg=(f"Malformed tool invocation: expected exactly one <tool> block, "f"but found {len(tool_matches)}.")raiseValueError(msg)_tool=tool_matches[0]# Match optional tool inputinput_matches=re.findall(r"<tool_input>(.*?)</tool_input>",text,re.DOTALL)iflen(input_matches)>1:msg=(f"Malformed tool invocation: expected at most one <tool_input> "f"block, but found {len(input_matches)}.")raiseValueError(msg)_tool_input=input_matches[0]ifinput_matcheselse""# Unescape if minimal escape format is usedifself.escape_format=="minimal":_tool=_unescape(_tool)_tool_input=_unescape(_tool_input)returnAgentAction(tool=_tool,tool_input=_tool_input,log=text)# Check for final answerif"<final_answer>"intextand"</final_answer>"intext:matches=re.findall(r"<final_answer>(.*?)</final_answer>",text,re.DOTALL)iflen(matches)!=1:msg=("Malformed output: expected exactly one ""<final_answer>...</final_answer> block.")raiseValueError(msg)answer=matches[0]# Unescape custom delimiters in final answerifself.escape_format=="minimal":answer=_unescape(answer)returnAgentFinish(return_values={"output":answer},log=text)msg=("Malformed output: expected either a tool invocation ""or a final answer in XML format.")raiseValueError(msg)