Source code for langchain_core.utils.pydantic

"""Utilities for pydantic."""

from __future__ import annotations

import inspect
import textwrap
import warnings
from contextlib import nullcontext
from functools import lru_cache, wraps
from types import GenericAlias
from typing import (
    Any,
    Callable,
    Optional,
    TypeVar,
    Union,
    cast,
    overload,
)

import pydantic
from pydantic import (
    BaseModel,
    ConfigDict,
    PydanticDeprecationWarning,
    RootModel,
    root_validator,
)
from pydantic import (
    create_model as _create_model_base,
)
from pydantic.json_schema import (
    DEFAULT_REF_TEMPLATE,
    GenerateJsonSchema,
    JsonSchemaMode,
    JsonSchemaValue,
)
from pydantic_core import core_schema


[docs] def get_pydantic_major_version() -> int: """Get the major version of Pydantic.""" try: import pydantic return int(pydantic.__version__.split(".")[0]) except ImportError: return 0
def _get_pydantic_minor_version() -> int: """Get the minor version of Pydantic.""" try: import pydantic return int(pydantic.__version__.split(".")[1]) except ImportError: return 0 PYDANTIC_MAJOR_VERSION = get_pydantic_major_version() PYDANTIC_MINOR_VERSION = _get_pydantic_minor_version() if PYDANTIC_MAJOR_VERSION == 1: from pydantic.fields import FieldInfo as FieldInfoV1 PydanticBaseModel = pydantic.BaseModel TypeBaseModel = type[BaseModel] elif PYDANTIC_MAJOR_VERSION == 2: from pydantic.v1.fields import FieldInfo as FieldInfoV1 # type: ignore[assignment] # Union type needs to be last assignment to PydanticBaseModel to make mypy happy. PydanticBaseModel = Union[BaseModel, pydantic.BaseModel] # type: ignore TypeBaseModel = Union[type[BaseModel], type[pydantic.BaseModel]] # type: ignore else: msg = f"Unsupported Pydantic version: {PYDANTIC_MAJOR_VERSION}" raise ValueError(msg) TBaseModel = TypeVar("TBaseModel", bound=PydanticBaseModel)
[docs] def is_pydantic_v1_subclass(cls: type) -> bool: """Check if the installed Pydantic version is 1.x-like.""" if PYDANTIC_MAJOR_VERSION == 1: return True elif PYDANTIC_MAJOR_VERSION == 2: from pydantic.v1 import BaseModel as BaseModelV1 if issubclass(cls, BaseModelV1): return True return False
[docs] def is_pydantic_v2_subclass(cls: type) -> bool: """Check if the installed Pydantic version is 1.x-like.""" from pydantic import BaseModel return PYDANTIC_MAJOR_VERSION == 2 and issubclass(cls, BaseModel)
[docs] def is_basemodel_subclass(cls: type) -> bool: """Check if the given class is a subclass of Pydantic BaseModel. Check if the given class is a subclass of any of the following: * pydantic.BaseModel in Pydantic 1.x * pydantic.BaseModel in Pydantic 2.x * pydantic.v1.BaseModel in Pydantic 2.x """ # Before we can use issubclass on the cls we need to check if it is a class if not inspect.isclass(cls) or isinstance(cls, GenericAlias): return False if PYDANTIC_MAJOR_VERSION == 1: from pydantic import BaseModel as BaseModelV1Proper if issubclass(cls, BaseModelV1Proper): return True elif PYDANTIC_MAJOR_VERSION == 2: from pydantic import BaseModel as BaseModelV2 from pydantic.v1 import BaseModel as BaseModelV1 if issubclass(cls, BaseModelV2): return True if issubclass(cls, BaseModelV1): return True else: msg = f"Unsupported Pydantic version: {PYDANTIC_MAJOR_VERSION}" raise ValueError(msg) return False
[docs] def is_basemodel_instance(obj: Any) -> bool: """Check if the given class is an instance of Pydantic BaseModel. Check if the given class is an instance of any of the following: * pydantic.BaseModel in Pydantic 1.x * pydantic.BaseModel in Pydantic 2.x * pydantic.v1.BaseModel in Pydantic 2.x """ if PYDANTIC_MAJOR_VERSION == 1: from pydantic import BaseModel as BaseModelV1Proper if isinstance(obj, BaseModelV1Proper): return True elif PYDANTIC_MAJOR_VERSION == 2: from pydantic import BaseModel as BaseModelV2 from pydantic.v1 import BaseModel as BaseModelV1 if isinstance(obj, BaseModelV2): return True if isinstance(obj, BaseModelV1): return True else: msg = f"Unsupported Pydantic version: {PYDANTIC_MAJOR_VERSION}" raise ValueError(msg) return False
# How to type hint this?
[docs] def pre_init(func: Callable) -> Any: """Decorator to run a function before model initialization. Args: func (Callable): The function to run before model initialization. Returns: Any: The decorated function. """ with warnings.catch_warnings(): warnings.filterwarnings(action="ignore", category=PydanticDeprecationWarning) @root_validator(pre=True) @wraps(func) def wrapper(cls: type[BaseModel], values: dict[str, Any]) -> dict[str, Any]: """Decorator to run a function before model initialization. Args: cls (Type[BaseModel]): The model class. values (Dict[str, Any]): The values to initialize the model with. Returns: Dict[str, Any]: The values to initialize the model with. """ # Insert default values fields = cls.model_fields for name, field_info in fields.items(): # Check if allow_population_by_field_name is enabled # If yes, then set the field name to the alias if ( hasattr(cls, "Config") and hasattr(cls.Config, "allow_population_by_field_name") and cls.Config.allow_population_by_field_name and field_info.alias in values ): values[name] = values.pop(field_info.alias) if ( hasattr(cls, "model_config") and cls.model_config.get("populate_by_name") and field_info.alias in values ): values[name] = values.pop(field_info.alias) if ( name not in values or values[name] is None ) and not field_info.is_required(): if field_info.default_factory is not None: values[name] = field_info.default_factory() # type: ignore else: values[name] = field_info.default # Call the decorated function return func(cls, values) return wrapper
class _IgnoreUnserializable(GenerateJsonSchema): """A JSON schema generator that ignores unknown types. https://docs.pydantic.dev/latest/concepts/json_schema/#customizing-the-json-schema-generation-process """ def handle_invalid_for_json_schema( self, schema: core_schema.CoreSchema, error_info: str ) -> JsonSchemaValue: return {} def _create_subset_model_v1( name: str, model: type[BaseModel], field_names: list, *, descriptions: Optional[dict] = None, fn_description: Optional[str] = None, ) -> type[BaseModel]: """Create a pydantic model with only a subset of model's fields.""" if PYDANTIC_MAJOR_VERSION == 1: from pydantic import create_model elif PYDANTIC_MAJOR_VERSION == 2: from pydantic.v1 import create_model # type: ignore else: msg = f"Unsupported pydantic version: {PYDANTIC_MAJOR_VERSION}" raise NotImplementedError(msg) fields = {} for field_name in field_names: # Using pydantic v1 so can access __fields__ as a dict. field = model.__fields__[field_name] # type: ignore t = ( # this isn't perfect but should work for most functions field.outer_type_ if field.required and not field.allow_none else Optional[field.outer_type_] ) if descriptions and field_name in descriptions: field.field_info.description = descriptions[field_name] fields[field_name] = (t, field.field_info) rtn = create_model(name, **fields) # type: ignore rtn.__doc__ = textwrap.dedent(fn_description or model.__doc__ or "") return rtn def _create_subset_model_v2( name: str, model: type[pydantic.BaseModel], field_names: list[str], *, descriptions: Optional[dict] = None, fn_description: Optional[str] = None, ) -> type[pydantic.BaseModel]: """Create a pydantic model with a subset of the model fields.""" from pydantic import create_model from pydantic.fields import FieldInfo descriptions_ = descriptions or {} fields = {} for field_name in field_names: field = model.model_fields[field_name] # type: ignore description = descriptions_.get(field_name, field.description) field_info = FieldInfo(description=description, default=field.default) if field.metadata: field_info.metadata = field.metadata fields[field_name] = (field.annotation, field_info) rtn = create_model( # type: ignore name, **fields, __config__=ConfigDict(arbitrary_types_allowed=True) ) # TODO(0.3): Determine if there is a more "pydantic" way to preserve annotations. # This is done to preserve __annotations__ when working with pydantic 2.x # and using the Annotated type with TypedDict. # Comment out the following line, to trigger the relevant test case. selected_annotations = [ (name, annotation) for name, annotation in model.__annotations__.items() if name in field_names ] rtn.__annotations__ = dict(selected_annotations) rtn.__doc__ = textwrap.dedent(fn_description or model.__doc__ or "") return rtn # Private functionality to create a subset model that's compatible across # different versions of pydantic. # Handles pydantic versions 1.x and 2.x. including v1 of pydantic in 2.x. # However, can't find a way to type hint this. def _create_subset_model( name: str, model: TypeBaseModel, field_names: list[str], *, descriptions: Optional[dict] = None, fn_description: Optional[str] = None, ) -> type[BaseModel]: """Create subset model using the same pydantic version as the input model.""" if PYDANTIC_MAJOR_VERSION == 1: return _create_subset_model_v1( name, model, field_names, descriptions=descriptions, fn_description=fn_description, ) elif PYDANTIC_MAJOR_VERSION == 2: from pydantic.v1 import BaseModel as BaseModelV1 if issubclass(model, BaseModelV1): return _create_subset_model_v1( name, model, field_names, descriptions=descriptions, fn_description=fn_description, ) else: return _create_subset_model_v2( name, model, field_names, descriptions=descriptions, fn_description=fn_description, ) else: msg = f"Unsupported pydantic version: {PYDANTIC_MAJOR_VERSION}" raise NotImplementedError(msg) if PYDANTIC_MAJOR_VERSION == 2: from pydantic import BaseModel as BaseModelV2 from pydantic.fields import FieldInfo as FieldInfoV2 from pydantic.v1 import BaseModel as BaseModelV1 @overload def get_fields(model: type[BaseModelV2]) -> dict[str, FieldInfoV2]: ... @overload def get_fields(model: BaseModelV2) -> dict[str, FieldInfoV2]: ... @overload def get_fields(model: type[BaseModelV1]) -> dict[str, FieldInfoV1]: ... @overload def get_fields(model: BaseModelV1) -> dict[str, FieldInfoV1]: ... def get_fields( model: Union[ BaseModelV2, BaseModelV1, type[BaseModelV2], type[BaseModelV1], ], ) -> Union[dict[str, FieldInfoV2], dict[str, FieldInfoV1]]: """Get the field names of a Pydantic model.""" if hasattr(model, "model_fields"): return model.model_fields # type: ignore elif hasattr(model, "__fields__"): return model.__fields__ # type: ignore else: msg = f"Expected a Pydantic model. Got {type(model)}" raise TypeError(msg) elif PYDANTIC_MAJOR_VERSION == 1: from pydantic import BaseModel as BaseModelV1_
[docs] def get_fields( # type: ignore[no-redef] model: Union[type[BaseModelV1_], BaseModelV1_], ) -> dict[str, FieldInfoV1]: """Get the field names of a Pydantic model.""" return model.__fields__ # type: ignore
else: msg = f"Unsupported Pydantic version: {PYDANTIC_MAJOR_VERSION}" raise ValueError(msg) _SchemaConfig = ConfigDict( arbitrary_types_allowed=True, frozen=True, protected_namespaces=() ) NO_DEFAULT = object() def _create_root_model( name: str, type_: Any, module_name: Optional[str] = None, default_: object = NO_DEFAULT, ) -> type[BaseModel]: """Create a base class.""" def schema( cls: type[BaseModel], by_alias: bool = True, ref_template: str = DEFAULT_REF_TEMPLATE, ) -> dict[str, Any]: # Complains about schema not being defined in superclass schema_ = super(cls, cls).schema( # type: ignore[misc] by_alias=by_alias, ref_template=ref_template ) schema_["title"] = name return schema_ def model_json_schema( cls: type[BaseModel], by_alias: bool = True, ref_template: str = DEFAULT_REF_TEMPLATE, schema_generator: type[GenerateJsonSchema] = GenerateJsonSchema, mode: JsonSchemaMode = "validation", ) -> dict[str, Any]: # Complains about model_json_schema not being defined in superclass schema_ = super(cls, cls).model_json_schema( # type: ignore[misc] by_alias=by_alias, ref_template=ref_template, schema_generator=schema_generator, mode=mode, ) schema_["title"] = name return schema_ base_class_attributes = { "__annotations__": {"root": type_}, "model_config": ConfigDict(arbitrary_types_allowed=True), "schema": classmethod(schema), "model_json_schema": classmethod(model_json_schema), "__module__": module_name or "langchain_core.runnables.utils", } if default_ is not NO_DEFAULT: base_class_attributes["root"] = default_ with warnings.catch_warnings(): try: if ( isinstance(type_, type) and not isinstance(type_, GenericAlias) and issubclass(type_, BaseModelV1) ): warnings.filterwarnings( action="ignore", category=PydanticDeprecationWarning ) except TypeError: pass custom_root_type = type(name, (RootModel,), base_class_attributes) return cast(type[BaseModel], custom_root_type) @lru_cache(maxsize=256) def _create_root_model_cached( model_name: str, type_: Any, *, module_name: Optional[str] = None, default_: object = NO_DEFAULT, ) -> type[BaseModel]: return _create_root_model( model_name, type_, default_=default_, module_name=module_name ) @lru_cache(maxsize=256) def _create_model_cached( __model_name: str, **field_definitions: Any, ) -> type[BaseModel]: return _create_model_base( __model_name, __config__=_SchemaConfig, **_remap_field_definitions(field_definitions), )
[docs] def create_model( __model_name: str, __module_name: Optional[str] = None, **field_definitions: Any, ) -> type[BaseModel]: """Create a pydantic model with the given field definitions. Please use create_model_v2 instead of this function. Args: __model_name: The name of the model. __module_name: The name of the module where the model is defined. This is used by Pydantic to resolve any forward references. **field_definitions: The field definitions for the model. Returns: Type[BaseModel]: The created model. """ kwargs = {} if "__root__" in field_definitions: kwargs["root"] = field_definitions.pop("__root__") return create_model_v2( __model_name, module_name=__module_name, field_definitions=field_definitions, **kwargs, )
# Reserved names should capture all the `public` names / methods that are # used by BaseModel internally. This will keep the reserved names up-to-date. # For reference, the reserved names are: # "construct", "copy", "dict", "from_orm", "json", "parse_file", "parse_obj", # "parse_raw", "schema", "schema_json", "update_forward_refs", "validate", # "model_computed_fields", "model_config", "model_construct", "model_copy", # "model_dump", "model_dump_json", "model_extra", "model_fields", # "model_fields_set", "model_json_schema", "model_parametrized_name", # "model_post_init", "model_rebuild", "model_validate", "model_validate_json", # "model_validate_strings" _RESERVED_NAMES = {key for key in dir(BaseModel) if not key.startswith("_")} def _remap_field_definitions(field_definitions: dict[str, Any]) -> dict[str, Any]: """This remaps fields to avoid colliding with internal pydantic fields.""" from pydantic import Field from pydantic.fields import FieldInfo remapped = {} for key, value in field_definitions.items(): if key.startswith("_") or key in _RESERVED_NAMES: # Let's add a prefix to avoid colliding with internal pydantic fields if isinstance(value, FieldInfo): msg = ( f"Remapping for fields starting with '_' or fields with a name " f"matching a reserved name {_RESERVED_NAMES} is not supported if " f" the field is a pydantic Field instance. Got {key}." ) raise NotImplementedError(msg) type_, default_ = value remapped[f"private_{key}"] = ( type_, Field( default=default_, alias=key, serialization_alias=key, title=key.lstrip("_").replace("_", " ").title(), ), ) else: remapped[key] = value return remapped
[docs] def create_model_v2( model_name: str, *, module_name: Optional[str] = None, field_definitions: Optional[dict[str, Any]] = None, root: Optional[Any] = None, ) -> type[BaseModel]: """Create a pydantic model with the given field definitions. Attention: Please do not use outside of langchain packages. This API is subject to change at any time. Args: model_name: The name of the model. module_name: The name of the module where the model is defined. This is used by Pydantic to resolve any forward references. field_definitions: The field definitions for the model. root: Type for a root model (RootModel) Returns: Type[BaseModel]: The created model. """ field_definitions = cast(dict[str, Any], field_definitions or {}) # type: ignore[no-redef] if root: if field_definitions: msg = ( "When specifying __root__ no other " f"fields should be provided. Got {field_definitions}" ) raise NotImplementedError(msg) if isinstance(root, tuple): kwargs = {"type_": root[0], "default_": root[1]} else: kwargs = {"type_": root} try: named_root_model = _create_root_model_cached( model_name, module_name=module_name, **kwargs ) except TypeError: # something in the arguments into _create_root_model_cached is not hashable named_root_model = _create_root_model( model_name, module_name=module_name, **kwargs, ) return named_root_model # No root, just field definitions names = set(field_definitions.keys()) capture_warnings = False for name in names: # Also if any non-reserved name is used (e.g., model_id or model_name) if name.startswith("model"): capture_warnings = True with warnings.catch_warnings() if capture_warnings else nullcontext(): # type: ignore[attr-defined] if capture_warnings: warnings.filterwarnings(action="ignore") try: return _create_model_cached(model_name, **field_definitions) except TypeError: # something in field definitions is not hashable return _create_model_base( model_name, __config__=_SchemaConfig, **_remap_field_definitions(field_definitions), )