Source code for langchain_google_vertexai.evaluators._core
"""Interfaces to be implemented by general evaluators.Remove after interfaces will be moved to lc-core."""from__future__importannotationsimportloggingfromabcimportABC,abstractmethodfromtypingimportAny,Optional,Unionfromwarningsimportwarnfromlangchain_core.runnables.configimportrun_in_executorlogger=logging.getLogger(__name__)class_EvalArgsMixin:"""Mixin for checking evaluation arguments."""@propertydefrequires_reference(self)->bool:"""Whether this evaluator requires a reference label."""returnFalse@propertydefrequires_input(self)->bool:"""Whether this evaluator requires an input string."""returnFalse@propertydef_skip_input_warning(self)->str:"""Warning to show when input is ignored."""returnf"Ignoring input in {self.__class__.__name__}, as it is not expected."@propertydef_skip_reference_warning(self)->str:"""Warning to show when reference is ignored."""return(f"Ignoring reference in {self.__class__.__name__}, as it is not expected.")def_check_evaluation_args(self,reference:Optional[str]=None,input:Optional[str]=None,)->None:"""Check if the evaluation arguments are valid. Args: reference (Optional[str], optional): The reference label. input (Optional[str], optional): The input string. Raises: ValueError: If the evaluator requires an input string but none is provided, or if the evaluator requires a reference label but none is provided. """ifself.requires_inputandinputisNone:raiseValueError(f"{self.__class__.__name__} requires an input string.")elifinputisnotNoneandnotself.requires_input:warn(self._skip_input_warning)ifself.requires_referenceandreferenceisNone:raiseValueError(f"{self.__class__.__name__} requires a reference string.")elifreferenceisnotNoneandnotself.requires_reference:warn(self._skip_reference_warning)classStringEvaluator(_EvalArgsMixin,ABC):"""Grade, tag, or otherwise evaluate predictions relative to their inputs and/or reference labels."""@propertydefevaluation_name(self)->str:"""The name of the evaluation."""returnself.__class__.__name__@propertydefrequires_reference(self)->bool:"""Whether this evaluator requires a reference label."""returnFalse@abstractmethoddef_evaluate_strings(self,*,prediction:Union[str,Any],reference:Optional[Union[str,Any]]=None,input:Optional[Union[str,Any]]=None,**kwargs:Any,)->dict:"""Evaluate Chain or LLM output, based on optional input and label. Args: prediction (str): The LLM or chain prediction to evaluate. reference (Optional[str], optional): The reference label to evaluate against. input (Optional[str], optional): The input to consider during evaluation. **kwargs: Additional keyword arguments, including callbacks, tags, etc. Returns: dict: The evaluation results containing the score or value. It is recommended that the dictionary contain the following keys: - score: the score of the evaluation, if applicable. - value: the string value of the evaluation, if applicable. - reasoning: the reasoning for the evaluation, if applicable. """# noqa: E501asyncdef_aevaluate_strings(self,*,prediction:Union[str,Any],reference:Optional[Union[str,Any]]=None,input:Optional[Union[str,Any]]=None,**kwargs:Any,)->dict:"""Asynchronously evaluate Chain or LLM output, based on optional input and label. Args: prediction (str): The LLM or chain prediction to evaluate. reference (Optional[str], optional): The reference label to evaluate against. input (Optional[str], optional): The input to consider during evaluation. **kwargs: Additional keyword arguments, including callbacks, tags, etc. Returns: dict: The evaluation results containing the score or value. It is recommended that the dictionary contain the following keys: - score: the score of the evaluation, if applicable. - value: the string value of the evaluation, if applicable. - reasoning: the reasoning for the evaluation, if applicable. """# noqa: E501returnawaitrun_in_executor(None,self._evaluate_strings,prediction=prediction,reference=reference,input=input,**kwargs,)defevaluate_strings(self,*,prediction:str,reference:Optional[str]=None,input:Optional[str]=None,**kwargs:Any,)->dict:"""Evaluate Chain or LLM output, based on optional input and label. Args: prediction (str): The LLM or chain prediction to evaluate. reference (Optional[str], optional): The reference label to evaluate against. input (Optional[str], optional): The input to consider during evaluation. **kwargs: Additional keyword arguments, including callbacks, tags, etc. Returns: dict: The evaluation results containing the score or value. """# noqa: E501self._check_evaluation_args(reference=reference,input=input)returnself._evaluate_strings(prediction=prediction,reference=reference,input=input,**kwargs)asyncdefaevaluate_strings(self,*,prediction:str,reference:Optional[str]=None,input:Optional[str]=None,**kwargs:Any,)->dict:"""Asynchronously evaluate Chain or LLM output, based on optional input and label. Args: prediction (str): The LLM or chain prediction to evaluate. reference (Optional[str], optional): The reference label to evaluate against. input (Optional[str], optional): The input to consider during evaluation. **kwargs: Additional keyword arguments, including callbacks, tags, etc. Returns: dict: The evaluation results containing the score or value. """# noqa: E501self._check_evaluation_args(reference=reference,input=input)returnawaitself._aevaluate_strings(prediction=prediction,reference=reference,input=input,**kwargs)classPairwiseStringEvaluator(_EvalArgsMixin,ABC):"""Compare the output of two models (or two outputs of the same model)."""@abstractmethoddef_evaluate_string_pairs(self,*,prediction:str,prediction_b:str,reference:Optional[str]=None,input:Optional[str]=None,**kwargs:Any,)->dict:"""Evaluate the output string pairs. Args: prediction (str): The output string from the first model. prediction_b (str): The output string from the second model. reference (Optional[str], optional): The expected output / reference string. input (Optional[str], optional): The input string. **kwargs: Additional keyword arguments, such as callbacks and optional reference strings. Returns: dict: A dictionary containing the preference, scores, and/or other information. """# noqa: E501asyncdef_aevaluate_string_pairs(self,*,prediction:str,prediction_b:str,reference:Optional[str]=None,input:Optional[str]=None,**kwargs:Any,)->dict:"""Asynchronously evaluate the output string pairs. Args: prediction (str): The output string from the first model. prediction_b (str): The output string from the second model. reference (Optional[str], optional): The expected output / reference string. input (Optional[str], optional): The input string. **kwargs: Additional keyword arguments, such as callbacks and optional reference strings. Returns: dict: A dictionary containing the preference, scores, and/or other information. """# noqa: E501returnawaitrun_in_executor(None,self._evaluate_string_pairs,prediction=prediction,prediction_b=prediction_b,reference=reference,input=input,**kwargs,)defevaluate_string_pairs(self,*,prediction:str,prediction_b:str,reference:Optional[str]=None,input:Optional[str]=None,**kwargs:Any,)->dict:"""Evaluate the output string pairs. Args: prediction (str): The output string from the first model. prediction_b (str): The output string from the second model. reference (Optional[str], optional): The expected output / reference string. input (Optional[str], optional): The input string. **kwargs: Additional keyword arguments, such as callbacks and optional reference strings. Returns: dict: A dictionary containing the preference, scores, and/or other information. """# noqa: E501self._check_evaluation_args(reference=reference,input=input)returnself._evaluate_string_pairs(prediction=prediction,prediction_b=prediction_b,reference=reference,input=input,**kwargs,)asyncdefaevaluate_string_pairs(self,*,prediction:str,prediction_b:str,reference:Optional[str]=None,input:Optional[str]=None,**kwargs:Any,)->dict:"""Asynchronously evaluate the output string pairs. Args: prediction (str): The output string from the first model. prediction_b (str): The output string from the second model. reference (Optional[str], optional): The expected output / reference string. input (Optional[str], optional): The input string. **kwargs: Additional keyword arguments, such as callbacks and optional reference strings. Returns: dict: A dictionary containing the preference, scores, and/or other information. """# noqa: E501self._check_evaluation_args(reference=reference,input=input)returnawaitself._aevaluate_string_pairs(prediction=prediction,prediction_b=prediction_b,reference=reference,input=input,**kwargs,)