from__future__importannotationsimportjsonimportrefromtypingimportAny,Callablefromlangchain_core.exceptionsimportOutputParserExceptiondef_replace_new_line(match:re.Match[str])->str:value=match.group(2)value=re.sub(r"\n",r"\\n",value)value=re.sub(r"\r",r"\\r",value)value=re.sub(r"\t",r"\\t",value)value=re.sub(r'(?<!\\)"',r"\"",value)returnmatch.group(1)+value+match.group(3)def_custom_parser(multiline_string:str)->str:"""The LLM response for `action_input` may be a multiline string containing unescaped newlines, tabs or quotes. This function replaces those characters with their escaped counterparts. (newlines in JSON must be double-escaped: `\\n`). """ifisinstance(multiline_string,(bytes,bytearray)):multiline_string=multiline_string.decode()multiline_string=re.sub(r'("action_input"\:\s*")(.*?)(")',_replace_new_line,multiline_string,flags=re.DOTALL,)returnmultiline_string# Adapted from https://github.com/KillianLucas/open-interpreter/blob/5b6080fae1f8c68938a1e4fa8667e3744084ee21/interpreter/utils/parse_partial_json.py# MIT License
[docs]defparse_partial_json(s:str,*,strict:bool=False)->Any:"""Parse a JSON string that may be missing closing braces. Args: s: The JSON string to parse. strict: Whether to use strict parsing. Defaults to False. Returns: The parsed JSON object as a Python dictionary. """# Attempt to parse the string as-is.try:returnjson.loads(s,strict=strict)exceptjson.JSONDecodeError:pass# Initialize variables.new_chars=[]stack=[]is_inside_string=Falseescaped=False# Process each character in the string one at a time.forcharins:new_char=charifis_inside_string:ifchar=='"'andnotescaped:is_inside_string=Falseelifchar=="\n"andnotescaped:new_char=("\\n"# Replace the newline character with the escape sequence.)elifchar=="\\":escaped=notescapedelse:escaped=Falseelse:ifchar=='"':is_inside_string=Trueescaped=Falseelifchar=="{":stack.append("}")elifchar=="[":stack.append("]")elifchar=="}"orchar=="]":ifstackandstack[-1]==char:stack.pop()else:# Mismatched closing character; the input is malformed.returnNone# Append the processed character to the new string.new_chars.append(new_char)# If we're still inside a string at the end of processing,# we need to close the string.ifis_inside_string:ifescaped:# Remoe unterminated escape characternew_chars.pop()new_chars.append('"')# Reverse the stack to get the closing characters.stack.reverse()# Try to parse mods of string until we succeed or run out of characters.whilenew_chars:# Close any remaining open structures in the reverse# order that they were opened.# Attempt to parse the modified string as JSON.try:returnjson.loads("".join(new_chars+stack),strict=strict)exceptjson.JSONDecodeError:# If we still can't parse the string as JSON,# try removing the last characternew_chars.pop()# If we got here, we ran out of characters to remove# and still couldn't parse the string as JSON, so return the parse error# for the original string.returnjson.loads(s,strict=strict)
[docs]defparse_json_markdown(json_string:str,*,parser:Callable[[str],Any]=parse_partial_json)->dict:"""Parse a JSON string from a Markdown string. Args: json_string: The Markdown string. Returns: The parsed JSON object as a Python dictionary. """try:return_parse_json(json_string,parser=parser)exceptjson.JSONDecodeError:# Try to find JSON string within triple backticksmatch=_json_markdown_re.search(json_string)# If no match found, assume the entire string is a JSON string# Else, use the content within the backticksjson_str=json_stringifmatchisNoneelsematch.group(2)return_parse_json(json_str,parser=parser)
_json_strip_chars=" \n\r\t`"def_parse_json(json_str:str,*,parser:Callable[[str],Any]=parse_partial_json)->dict:# Strip whitespace,newlines,backtick from the start and endjson_str=json_str.strip(_json_strip_chars)# handle newlines and other special characters inside the returned valuejson_str=_custom_parser(json_str)# Parse the JSON string into a Python dictionaryreturnparser(json_str)
[docs]defparse_and_check_json_markdown(text:str,expected_keys:list[str])->dict:"""Parse a JSON string from a Markdown string and check that it contains the expected keys. Args: text: The Markdown string. expected_keys: The expected keys in the JSON string. Returns: The parsed JSON object as a Python dictionary. Raises: OutputParserException: If the JSON string is invalid or does not contain the expected keys. """try:json_obj=parse_json_markdown(text)exceptjson.JSONDecodeErrorase:msg=f"Got invalid JSON object. Error: {e}"raiseOutputParserException(msg)fromeforkeyinexpected_keys:ifkeynotinjson_obj:msg=(f"Got invalid return object. Expected key `{key}` "f"to be present, but got {json_obj}")raiseOutputParserException(msg)returnjson_obj