[docs]classPandasDataFrameOutputParser(BaseOutputParser[dict[str,Any]]):"""Parse an output using Pandas DataFrame format.""""""The Pandas DataFrame to parse."""dataframe:Any@field_validator("dataframe")@classmethoddefvalidate_dataframe(cls,val:Any)->Any:importpandasaspdifissubclass(type(val),pd.DataFrame):returnvalifpd.DataFrame(val).empty:msg="DataFrame cannot be empty."raiseValueError(msg)msg="Wrong type for 'dataframe', must be a subclass \ of Pandas DataFrame (pd.DataFrame)"raiseTypeError(msg)
[docs]defparse_array(self,array:str,original_request_params:str,)->tuple[list[Union[int,str]],str]:parsed_array:list[Union[int,str]]=[]# Check if the format is [1,3,5]ifre.match(r"\[\d+(,\s*\d+)*\]",array):parsed_array=[int(i)foriinre.findall(r"\d+",array)]# Check if the format is [1..5]elifre.match(r"\[(\d+)\.\.(\d+)\]",array):match=re.match(r"\[(\d+)\.\.(\d+)\]",array)ifmatch:start,end=map(int,match.groups())parsed_array=list(range(start,end+1))else:msg=f"Unable to parse the array provided in {array}. \ Please check the format instructions."raiseOutputParserException(msg)# Check if the format is ["column_name"]elifre.match(r"\[[a-zA-Z0-9_]+(?:,[a-zA-Z0-9_]+)*\]",array):match=re.match(r"\[[a-zA-Z0-9_]+(?:,[a-zA-Z0-9_]+)*\]",array)ifmatch:parsed_array=list(map(str,match.group().strip("[]").split(",")))else:msg=f"Unable to parse the array provided in {array}. \ Please check the format instructions."raiseOutputParserException(msg)# Validate the arrayifnotparsed_array:msg=f"Invalid array format in '{original_request_params}'. \ Please check the format instructions."raiseOutputParserException(msg)if(isinstance(parsed_array[0],int)andparsed_array[-1]>self.dataframe.index.max()):msg=f"The maximum index {parsed_array[-1]} exceeds the maximum index of \ the Pandas DataFrame {self.dataframe.index.max()}."raiseOutputParserException(msg)returnparsed_array,original_request_params.split("[")[0]
[docs]defparse(self,request:str)->dict[str,Any]:stripped_request_params=Nonesplitted_request=request.strip().split(":")iflen(splitted_request)!=2:msg=f"Request '{request}' is not correctly formatted. \ Please refer to the format instructions."raiseOutputParserException(msg)result={}try:request_type,request_params=splitted_requestifrequest_typein{"Invalid column","Invalid operation"}:msg=f"{request}. Please check the format instructions."raiseOutputParserException(msg)array_exists=re.search(r"(\[.*?\])",request_params)ifarray_exists:parsed_array,stripped_request_params=self.parse_array(array_exists.group(1),request_params,)ifrequest_type=="column":filtered_df=self.dataframe[self.dataframe.index.isin(parsed_array)]iflen(parsed_array)==1:result[stripped_request_params]=filtered_df[stripped_request_params].iloc[parsed_array[0]]else:result[stripped_request_params]=filtered_df[stripped_request_params]elifrequest_type=="row":filtered_df=self.dataframe[self.dataframe.columns.intersection(parsed_array)]iflen(parsed_array)==1:result[stripped_request_params]=filtered_df.iloc[int(stripped_request_params)][parsed_array[0]]else:result[stripped_request_params]=filtered_df.iloc[int(stripped_request_params)]else:filtered_df=self.dataframe[self.dataframe.index.isin(parsed_array)]result[request_type]=getattr(filtered_df[stripped_request_params],request_type,)()else:ifrequest_type=="column":result[request_params]=self.dataframe[request_params]elifrequest_type=="row":result[request_params]=self.dataframe.iloc[int(request_params)]else:result[request_type]=getattr(self.dataframe[request_params],request_type,)()except(AttributeError,IndexError,KeyError)ase:ifrequest_typenotin{"column","row"}:msg=f"Unsupported request type '{request_type}'. \ Please check the format instructions."raiseOutputParserException(msg)fromemsg=f"""Requested index {request_paramsifstripped_request_paramsisNoneelsestripped_request_params} is out of bounds."""raiseOutputParserException(msg)fromereturnresult