Formatron v0.4.2
Formatron empowers everyone to control the output format of language models with minimal overhead.
Loading...
Searching...
No Matches
dict_inference.py
Go to the documentation of this file.
1"""
2This module contains utilities for inferring schemas from dictionaries.
3"""
4import collections.abc
5import json
6from typing import Any, Type
7
8from pydantic import typing
9
10from formatron import schemas
11
12
14 __slots__ = ("_annotation",)
15
16 def __init__(self, annotation: typing.Type):
17 """
18 Initialize the field information.
19
20 Args:
21 annotation: The type annotation of the field.
22 """
23 self._annotation = annotation
25 @property
26 def annotation(self) -> typing.Type[typing.Any] | None:
27 """
28 Get the type annotation of the field.
29 """
30 return self._annotation
31
32 @property
33 def required(self) -> bool:
34 """
35 Check if the field is required for the schema.
36 """
37 return True
38
39
40def _infer_type(value: Any) -> Type[Any]:
41 if isinstance(value, collections.abc.Mapping):
42 return infer_mapping(value)
43 elif isinstance(value, collections.abc.Sequence) and not isinstance(value, str):
44 # Handle sequences with possibly heterogeneous elements
45 if not value:
46 return collections.Sequence[Any]
47 element_types = set()
48 for element in value:
49 element_type = type(element)
50 # Check for dictionary
51 original = typing.get_origin(element_type)
52 if original is None:
53 original = element_type
54 if original is typing.Mapping or isinstance(original, type) and issubclass(original,
55 collections.abc.Mapping):
56 element_types.add(infer_mapping(element))
57 else:
58 element_types.add(element_type)
59 if len(element_types) == 1:
60 return collections.abc.Sequence[next(iter(element_types))]
61 union_type = typing.Union[tuple(element_types)]
62 return collections.abc.Sequence[union_type]
63 else:
64 return type(value)
65
66
67def infer_mapping(mapping: collections.abc.Mapping[str, Any]) -> typing.Type[schemas.schema.Schema]:
68 """
69 Recursively infer a schema from a mapping.
70
71 Types that are specially handled:
72 - collections.abc.Mapping: converted to a schema. Keys are converted to field names and corresponding value types are converted to field types.
73 - collections.abc.Sequence with heterogeneous elements: all different element types are included in a union type.
74
75 Other types are directly inferred from the type of the value with no special handling.
76 """
77 field_infos = {}
78 for key, value in mapping.items():
79 assert isinstance(key, str), f"Key must be a string, got {key} of type {type(key)}"
80 assert key.isidentifier(), f"Key must be a valid identifier, got {key}"
81 inferred_type = _infer_type(value)
82 field_infos[key] = FieldInfo(inferred_type)
83 _class = type(f"Mapping_{id(mapping)}", (schemas.schema.Schema,), {"fields": lambda: field_infos})
84 _class.from_json = classmethod(lambda cls, json_str: json.loads(json_str))
85 return _class
__init__(self, typing.Type annotation)
Initialize the field information.
typing.Type[typing.Any]|None annotation(self)
Get the type annotation of the field.
bool required(self)
Check if the field is required for the schema.
An abstract schema that describes some data.
Definition schema.py:48
typing.Type[schemas.schema.Schema] infer_mapping(collections.abc.Mapping[str, Any] mapping)
Recursively infer a schema from a mapping.
Type[Any] _infer_type(Any value)