Source code for langchain_core.utils.json_schema

"""Utilities for JSON Schema."""

from __future__ import annotations

from copy import deepcopy
from typing import TYPE_CHECKING, Any, Optional, Union

if TYPE_CHECKING:
    from collections.abc import Sequence


def _retrieve_ref(path: str, schema: dict) -> Union[list, dict]:
    components = path.split("/")
    if components[0] != "#":
        msg = (
            "ref paths are expected to be URI fragments, meaning they should start "
            "with #."
        )
        raise ValueError(msg)
    out: Union[list, dict] = schema
    for component in components[1:]:
        if component in out:
            if isinstance(out, list):
                msg = f"Reference '{path}' not found."
                raise KeyError(msg)
            out = out[component]
        elif component.isdigit():
            index = int(component)
            if (isinstance(out, list) and 0 <= index < len(out)) or (
                isinstance(out, dict) and index in out
            ):
                out = out[index]
            else:
                msg = f"Reference '{path}' not found."
                raise KeyError(msg)
        else:
            msg = f"Reference '{path}' not found."
            raise KeyError(msg)
    return deepcopy(out)


def _process_dict_properties(
    properties: dict[str, Any],
    full_schema: dict[str, Any],
    processed_refs: set[str],
    skip_keys: Sequence[str],
    *,
    shallow_refs: bool,
) -> dict[str, Any]:
    """Process dictionary properties, recursing into nested structures."""
    result: dict[str, Any] = {}
    for key, value in properties.items():
        if key in skip_keys:
            # Skip recursion for specified keys, just copy the value as-is
            result[key] = deepcopy(value)
        elif isinstance(value, (dict, list)):
            # Recursively process nested objects and arrays
            result[key] = _dereference_refs_helper(
                value, full_schema, processed_refs, skip_keys, shallow_refs
            )
        else:
            # Copy primitive values directly
            result[key] = value
    return result


def _dereference_refs_helper(
    obj: Any,
    full_schema: dict[str, Any],
    processed_refs: Optional[set[str]],
    skip_keys: Sequence[str],
    shallow_refs: bool,  # noqa: FBT001
) -> Any:
    """Dereference JSON Schema $ref objects, handling both pure and mixed references.

    This function processes JSON Schema objects containing $ref properties by resolving
    the references and merging any additional properties. It handles:

    - Pure $ref objects: {"$ref": "#/path/to/definition"}
    - Mixed $ref objects: {"$ref": "#/path", "title": "Custom Title", ...}
    - Circular references by breaking cycles and preserving non-ref properties

    Args:
        obj: The object to process (can be dict, list, or primitive)
        full_schema: The complete schema containing all definitions
        processed_refs: Set tracking currently processing refs (for cycle detection)
        skip_keys: Keys under which to skip recursion
        shallow_refs: If True, only break cycles; if False, deep-inline all refs

    Returns:
        The object with $ref properties resolved and merged with other properties.
    """
    if processed_refs is None:
        processed_refs = set()

    # Case 1: Object contains a $ref property (pure or mixed with additional properties)
    if isinstance(obj, dict) and "$ref" in obj:
        ref_path = obj["$ref"]
        additional_properties = {
            key: value for key, value in obj.items() if key != "$ref"
        }

        # Detect circular reference: if we're already processing this $ref,
        # return only the additional properties to break the cycle
        if ref_path in processed_refs:
            return _process_dict_properties(
                additional_properties,
                full_schema,
                processed_refs,
                skip_keys,
                shallow_refs=shallow_refs,
            )

        # Mark this reference as being processed (for cycle detection)
        processed_refs.add(ref_path)

        # Fetch and recursively resolve the referenced object
        referenced_object = deepcopy(_retrieve_ref(ref_path, full_schema))
        resolved_reference = _dereference_refs_helper(
            referenced_object, full_schema, processed_refs, skip_keys, shallow_refs
        )

        # Clean up: remove from processing set before returning
        processed_refs.remove(ref_path)

        # Pure $ref case: no additional properties, return resolved reference directly
        if not additional_properties:
            return resolved_reference

        # Mixed $ref case: merge resolved reference with additional properties
        # Additional properties take precedence over resolved properties
        merged_result = {}
        if isinstance(resolved_reference, dict):
            merged_result.update(resolved_reference)

        # Process additional properties and merge them (they override resolved ones)
        processed_additional = _process_dict_properties(
            additional_properties,
            full_schema,
            processed_refs,
            skip_keys,
            shallow_refs=shallow_refs,
        )
        merged_result.update(processed_additional)

        return merged_result

    # Case 2: Regular dictionary without $ref - process all properties
    if isinstance(obj, dict):
        return _process_dict_properties(
            obj, full_schema, processed_refs, skip_keys, shallow_refs=shallow_refs
        )

    # Case 3: List - recursively process each item
    if isinstance(obj, list):
        return [
            _dereference_refs_helper(
                item, full_schema, processed_refs, skip_keys, shallow_refs
            )
            for item in obj
        ]

    # Case 4: Primitive value (string, number, boolean, null) - return unchanged
    return obj


[docs] def dereference_refs( schema_obj: dict, *, full_schema: Optional[dict] = None, skip_keys: Optional[Sequence[str]] = None, ) -> dict: """Resolve and inline JSON Schema $ref references in a schema object. This function processes a JSON Schema and resolves all $ref references by replacing them with the actual referenced content. It handles both simple references and complex cases like circular references and mixed $ref objects that contain additional properties alongside the $ref. Args: schema_obj: The JSON Schema object or fragment to process. This can be a complete schema or just a portion of one. full_schema: The complete schema containing all definitions that $refs might point to. If not provided, defaults to schema_obj (useful when the schema is self-contained). skip_keys: Controls recursion behavior and reference resolution depth: - If None (default): Only recurse under '$defs' and use shallow reference resolution (break cycles but don't deep-inline nested refs) - If provided (even as []): Recurse under all keys and use deep reference resolution (fully inline all nested references) Returns: A new dictionary with all $ref references resolved and inlined. The original schema_obj is not modified. Examples: Basic reference resolution: >>> schema = { ... "type": "object", ... "properties": {"name": {"$ref": "#/$defs/string_type"}}, ... "$defs": {"string_type": {"type": "string"}}, ... } >>> result = dereference_refs(schema) >>> result["properties"]["name"] # {"type": "string"} Mixed $ref with additional properties: >>> schema = { ... "properties": { ... "name": {"$ref": "#/$defs/base", "description": "User name"} ... }, ... "$defs": {"base": {"type": "string", "minLength": 1}}, ... } >>> result = dereference_refs(schema) >>> result["properties"]["name"] # {"type": "string", "minLength": 1, "description": "User name"} Handling circular references: >>> schema = { ... "properties": {"user": {"$ref": "#/$defs/User"}}, ... "$defs": { ... "User": { ... "type": "object", ... "properties": {"friend": {"$ref": "#/$defs/User"}}, ... } ... }, ... } >>> result = dereference_refs(schema) # Won't cause infinite recursion Note: - Circular references are handled gracefully by breaking cycles - Mixed $ref objects (with both $ref and other properties) are supported - Additional properties in mixed $refs override resolved properties - The $defs section is preserved in the output by default """ full = full_schema or schema_obj keys_to_skip = list(skip_keys) if skip_keys is not None else ["$defs"] shallow = skip_keys is None return _dereference_refs_helper(schema_obj, full, None, keys_to_skip, shallow)