Source code for langchain.evaluation.parsing.json_distance

import json
from typing import Any, Callable, Optional, Union

from langchain_core.utils.json import parse_json_markdown

from langchain.evaluation.schema import StringEvaluator


[docs]class JsonEditDistanceEvaluator(StringEvaluator): """ An evaluator that calculates the edit distance between JSON strings. This evaluator computes a normalized Damerau-Levenshtein distance between two JSON strings after parsing them and converting them to a canonical format (i.e., whitespace and key order are normalized). It can be customized with alternative distance and canonicalization functions. Args: string_distance (Optional[Callable[[str, str], float]]): A callable that computes the distance between two strings. If not provided, a Damerau-Levenshtein distance from the `rapidfuzz` package will be used. canonicalize (Optional[Callable[[Any], Any]]): A callable that converts a parsed JSON object into its canonical string form. If not provided, the default behavior is to serialize the JSON with sorted keys and no extra whitespace. **kwargs (Any): Additional keyword arguments. Attributes: _string_distance (Callable[[str, str], float]): The internal distance computation function. _canonicalize (Callable[[Any], Any]): The internal canonicalization function. Examples: >>> evaluator = JsonEditDistanceEvaluator() >>> result = evaluator.evaluate_strings(prediction='{"a": 1, "b": 2}', reference='{"a": 1, "b": 3}') >>> assert result["score"] is not None Raises: ImportError: If `rapidfuzz` is not installed and no alternative `string_distance` function is provided. """ # noqa: E501
[docs] def __init__( self, string_distance: Optional[Callable[[str, str], float]] = None, canonicalize: Optional[Callable[[Any], Any]] = None, **kwargs: Any, ) -> None: super().__init__() if string_distance is not None: self._string_distance = string_distance else: try: from rapidfuzz import distance as rfd except ImportError: raise ImportError( "The default string_distance operator for the " " JsonEditDistanceEvaluator requires installation of " "the rapidfuzz package. " "Please install it with `pip install rapidfuzz`." ) self._string_distance = rfd.DamerauLevenshtein.normalized_distance if canonicalize is not None: self._canonicalize = canonicalize else: self._canonicalize = lambda x: json.dumps( x, separators=(",", ":"), sort_keys=True, # eliminate whitespace )
@property def requires_input(self) -> bool: return False @property def requires_reference(self) -> bool: return True @property def evaluation_name(self) -> str: return "json_edit_distance" def _parse_json(self, node: Any) -> Union[dict, list, None, float, bool, int, str]: if isinstance(node, str): return parse_json_markdown(node) return node def _evaluate_strings( self, prediction: str, input: Optional[str] = None, reference: Optional[str] = None, **kwargs: Any, ) -> dict: parsed = self._canonicalize(self._parse_json(prediction)) label = self._canonicalize(self._parse_json(reference)) distance = self._string_distance(parsed, label) return {"score": distance}