[docs]classPandasDataFrameOutputParser(BaseOutputParser[Dict[str,Any]]):"""Parse an output using Pandas DataFrame format.""""""The Pandas DataFrame to parse."""dataframe:Any@field_validator("dataframe")@classmethoddefvalidate_dataframe(cls,val:Any)->Any:importpandasaspdifissubclass(type(val),pd.DataFrame):returnvalifpd.DataFrame(val).empty:raiseValueError("DataFrame cannot be empty.")raiseTypeError("Wrong type for 'dataframe', must be a subclass \ of Pandas DataFrame (pd.DataFrame)")
[docs]defparse_array(self,array:str,original_request_params:str)->Tuple[List[Union[int,str]],str]:parsed_array:List[Union[int,str]]=[]# Check if the format is [1,3,5]ifre.match(r"\[\d+(,\s*\d+)*\]",array):parsed_array=[int(i)foriinre.findall(r"\d+",array)]# Check if the format is [1..5]elifre.match(r"\[(\d+)\.\.(\d+)\]",array):match=re.match(r"\[(\d+)\.\.(\d+)\]",array)ifmatch:start,end=map(int,match.groups())parsed_array=list(range(start,end+1))else:raiseOutputParserException(f"Unable to parse the array provided in {array}. \ Please check the format instructions.")# Check if the format is ["column_name"]elifre.match(r"\[[a-zA-Z0-9_]+(?:,[a-zA-Z0-9_]+)*\]",array):match=re.match(r"\[[a-zA-Z0-9_]+(?:,[a-zA-Z0-9_]+)*\]",array)ifmatch:parsed_array=list(map(str,match.group().strip("[]").split(",")))else:raiseOutputParserException(f"Unable to parse the array provided in {array}. \ Please check the format instructions.")# Validate the arrayifnotparsed_array:raiseOutputParserException(f"Invalid array format in '{original_request_params}'. \ Please check the format instructions.")elif(isinstance(parsed_array[0],int)andparsed_array[-1]>self.dataframe.index.max()):raiseOutputParserException(f"The maximum index {parsed_array[-1]} exceeds the maximum index of \ the Pandas DataFrame {self.dataframe.index.max()}.")returnparsed_array,original_request_params.split("[")[0]
[docs]defparse(self,request:str)->Dict[str,Any]:stripped_request_params=Nonesplitted_request=request.strip().split(":")iflen(splitted_request)!=2:raiseOutputParserException(f"Request '{request}' is not correctly formatted. \ Please refer to the format instructions.")result={}try:request_type,request_params=splitted_requestifrequest_typein{"Invalid column","Invalid operation"}:raiseOutputParserException(f"{request}. Please check the format instructions.")array_exists=re.search(r"(\[.*?\])",request_params)ifarray_exists:parsed_array,stripped_request_params=self.parse_array(array_exists.group(1),request_params)ifrequest_type=="column":filtered_df=self.dataframe[self.dataframe.index.isin(parsed_array)]iflen(parsed_array)==1:result[stripped_request_params]=filtered_df[stripped_request_params].iloc[parsed_array[0]]else:result[stripped_request_params]=filtered_df[stripped_request_params]elifrequest_type=="row":filtered_df=self.dataframe[self.dataframe.columns.intersection(parsed_array)]iflen(parsed_array)==1:result[stripped_request_params]=filtered_df.iloc[int(stripped_request_params)][parsed_array[0]]else:result[stripped_request_params]=filtered_df.iloc[int(stripped_request_params)]else:filtered_df=self.dataframe[self.dataframe.index.isin(parsed_array)]result[request_type]=getattr(filtered_df[stripped_request_params],request_type)()else:ifrequest_type=="column":result[request_params]=self.dataframe[request_params]elifrequest_type=="row":result[request_params]=self.dataframe.iloc[int(request_params)]else:result[request_type]=getattr(self.dataframe[request_params],request_type)()except(AttributeError,IndexError,KeyError):ifrequest_typenotin{"column","row"}:raiseOutputParserException(f"Unsupported request type '{request_type}'. \ Please check the format instructions.")raiseOutputParserException(f"""Requested index {request_paramsifstripped_request_paramsisNoneelsestripped_request_params} is out of bounds.""")returnresult