60 line = [f
"{nonterminal} ::= ",
"object_begin "]
63 for field, _field_info
in current.fields().items():
64 field_name = f
"{nonterminal}_{field}"
65 field_name = escape_identifier(field_name)
66 key = from_str_to_kbnf_str(field)
67 fields.append(f
"{key} colon {field_name}")
68 result.append((_field_info, field_name))
69 line.append(
" comma ".join(fields))
70 line.append(
" object_end;\n")
71 return "".join(line), result
74 def field_info(current: typing.Type, nonterminal: str):
76 annotation = current.annotation
78 return "", [(annotation, nonterminal)]
79 new_nonterminal = f
"{nonterminal}_required"
80 return f
"{nonterminal} ::= {new_nonterminal}?;\n", [(annotation, new_nonterminal)]
83 def string_metadata(current: typing.Type, nonterminal: str):
84 min_length = current.metadata.get(
"min_length")
85 max_length = current.metadata.get(
"max_length")
86 pattern = current.metadata.get(
"pattern")
87 substring_of = current.metadata.get(
"substring_of")
89 assert not (min_length
or max_length
or substring_of),
"pattern is mutually exclusive with min_length, max_length and substring_of"
91 assert not (min_length
or max_length
or pattern),
"substring_of is mutually exclusive with min_length, max_length and pattern"
93 (
True,
False): f
"{{{min_length},}}",
94 (
False,
True): f
"{{0,{max_length}}}",
95 (
True,
True): f
"{{{min_length},{max_length}}}"
97 repetition = repetition_map.get((min_length
is not None, max_length
is not None))
98 if repetition
is not None:
99 return fr
"""{nonterminal} ::= #'"([^\\\\"\u0000-\u001f]|\\\\["\\\\bfnrt/]|\\\\u[0-9A-Fa-f]{{4}}){repetition}"';
101 if pattern
is not None:
102 pattern = pattern.replace(
"'",
"\\'")
103 return f
"""{nonterminal} ::= #'"{pattern}"';\n""", []
104 if substring_of
is not None:
105 return f
"""{nonterminal} ::= '"' #substrs{repr(substring_of)} '"';\n""", []
107 def number_metadata(current: typing.Type, nonterminal: str):
108 gt = current.metadata.get(
"gt")
109 ge = current.metadata.get(
"ge")
110 lt = current.metadata.get(
"lt")
111 le = current.metadata.get(
"le")
120 for (condition, value), prefix
in prefix_map.items():
121 if condition
is not None and condition == value:
122 if issubclass(current.type, int):
123 return f
"""{nonterminal} ::= #'{prefix}[1-9][0-9]*';\n""", []
124 elif issubclass(current.type, float):
125 return f
"""{nonterminal} ::= #'{prefix}[1-9][0-9]*(\\.[0-9]+)?([eE][+-]?[0-9]+)?';\n""", []
127 raise ValueError(f
"{current.type.__name__} metadata {current.metadata} is not supported in json_generators!")
129 def sequence_metadata(current: typing.Type, nonterminal: str):
130 min_items = current.metadata.get(
"min_length")
131 max_items = current.metadata.get(
"max_length")
132 prefix_items = current.metadata.get(
"prefix_items")
133 additional_items = current.metadata.get(
"additional_items")
134 if max_items
is not None and prefix_items
is not None and max_items <= len(prefix_items):
135 prefix_items = prefix_items[:max_items+1]
139 if not additional_items:
140 if min_items > len(prefix_items):
141 raise ValueError(f
"min_items {min_items} is greater than the number of prefix_items {len(prefix_items)} and additional_items is not allowed")
142 max_items = len(prefix_items)
143 if min_items
is not None or max_items
is not None:
144 new_nonterminal = f
"{nonterminal}_item"
146 if min_items
is None:
148 if min_items == 0
and max_items
is None and prefix_items
is None:
149 return "", [(current.type, new_nonterminal)]
150 prefix_items_nonterminals = [f
"{new_nonterminal}_{i}" for i
in range(len(prefix_items))]
if prefix_items
else []
151 prefix_items_parts = []
152 if prefix_items
is not None:
153 for i
in range(max(min_items,1), len(prefix_items)+1):
154 prefix_items_parts.append(prefix_items_nonterminals[:i])
156 ebnf_rules.append(f
"{nonterminal} ::= array_begin array_end;")
157 if max_items
is None:
159 min_items_part =
' comma '.join([new_nonterminal] * (min_items - 1))
160 ebnf_rules.append(f
"{nonterminal} ::= array_begin {min_items_part} comma {new_nonterminal}+ array_end;")
161 elif len(prefix_items_parts) >= min_items:
162 for prefix_items_part
in prefix_items_parts:
163 prefix_items_part =
' comma '.join(prefix_items_part)
164 ebnf_rules.append(f
"{nonterminal} ::= array_begin {prefix_items_part} (comma {new_nonterminal})* array_end;")
166 min_items_part =
' comma '.join([new_nonterminal] * (min_items - len(prefix_items_nonterminals)-1))
168 min_items_part =
"comma " + min_items_part
169 prefix_items_part =
' comma '.join(prefix_items_nonterminals)
170 ebnf_rules.append(f
"{nonterminal} ::= array_begin {prefix_items_part} {min_items_part} comma {new_nonterminal}+ array_end;")
171 elif min_items == 0
and not prefix_items:
172 for i
in range(min_items, max_items + 1):
173 items =
' comma '.join([new_nonterminal] * i)
174 ebnf_rules.append(f
"{nonterminal} ::= array_begin {items} array_end;")
176 prefix_items_num = len(prefix_items_nonterminals)
178 for prefix_items_part
in prefix_items_parts:
179 prefix_items_part =
' comma '.join(prefix_items_part)
180 ebnf_rules.append(f
"{nonterminal} ::= array_begin {prefix_items_part} array_end;")
181 min_items_part =
' comma '.join([new_nonterminal] * (min_items - prefix_items_num))
182 prefix_items_part =
' comma '.join(prefix_items_nonterminals)
183 if min_items_part
and prefix_items_part:
184 ebnf_rules.append(f
"{nonterminal}_min ::= {prefix_items_part} comma {min_items_part};")
186 ebnf_rules.append(f
"{nonterminal}_min ::= {min_items_part};")
187 elif prefix_items_part:
188 ebnf_rules.append(f
"{nonterminal}_min ::= {prefix_items_part};")
190 common = max(min_items, prefix_items_num)
191 for i
in range(1, max_items + 1 - common):
192 items =
' comma '.join([new_nonterminal] * i)
193 ebnf_rules.append(f
"{nonterminal} ::= array_begin {nonterminal}_min comma {items} array_end;")
195 args = typing.get_args(current.type)
200 item_type = typing.Any
202 return "\n".join(ebnf_rules) +
"\n", list(zip(prefix_items, prefix_items_nonterminals)) + [(item_type, new_nonterminal)]
203 return "\n".join(ebnf_rules) +
"\n", [(item_type, new_nonterminal)]
206 def is_sequence_like(current: typing.Type) -> bool:
208 Check if the given type is sequence-like.
210 This function returns True for:
214 - Any subclass of collections.abc.Sequence
219 current: The type to check.
222 bool: True if the type is sequence-like, False otherwise.
224 original = typing.get_origin(current)
228 original
is typing.Sequence
or
229 original
is typing.List
or
230 original
is typing.Tuple
or
231 (isinstance(original, type)
and (issubclass(original, collections.abc.Sequence)
or
232 issubclass(original, list)
or
233 issubclass(original, tuple)))
236 def metadata(current: typing.Type, nonterminal: str):
238 original = typing.get_origin(current.type)
240 original = current.type
241 if not current.metadata:
242 return "", [(current.type, nonterminal)]
243 if isinstance(current.type, type)
and issubclass(current.type, str):
244 return string_metadata(current, nonterminal)
245 elif isinstance(current.type, type)
and issubclass(current.type, (int, float)):
246 return number_metadata(current, nonterminal)
247 elif is_sequence_like(original):
248 return sequence_metadata(current, nonterminal)
251 def builtin_sequence(current: typing.Type, nonterminal: str):
252 original = typing.get_origin(current)
255 if is_sequence_like(original):
256 new_nonterminal = f
"{nonterminal}_value"
257 annotation = typing.get_args(current)
259 annotation = typing.Any
261 annotation = annotation[0]
262 return f
"{nonterminal} ::= array_begin ({new_nonterminal} (comma {new_nonterminal})*)? array_end;\n", \
263 [(annotation, new_nonterminal)]
266 def builtin_dict(current: typing.Type, nonterminal: str):
267 original = typing.get_origin(current)
270 if original
is typing.Mapping
or isinstance(original, type)
and issubclass(original,
271 collections.abc.Mapping):
272 new_nonterminal = f
"{nonterminal}_value"
273 args = typing.get_args(current)
278 args[0], str), f
"{args[0]} is not string!"
280 return f
"{nonterminal} ::=" \
281 f
" object_begin (string colon {new_nonterminal} (comma string colon {new_nonterminal})*)?" \
283 [(value, new_nonterminal)]
286 def builtin_tuple(current: typing.Type, nonterminal: str):
287 if typing.get_origin(current)
is tuple
or isinstance(current, type)
and issubclass(current, tuple):
288 args = typing.get_args(current)
289 new_nonterminals = []
291 for i, arg
in enumerate(args):
293 new_nonterminals.append(f
"{nonterminal}_{i}")
294 return f
"{nonterminal} ::=array_begin {' comma '.join(new_nonterminals)} array_end;\n", \
295 zip(result, new_nonterminals)
297 def builtin_union(current: typing.Type, nonterminal: str):
298 if typing.get_origin(current)
is typing.Union:
299 args = typing.get_args(current)
300 assert args, f
"{current} from {nonterminal} cannot be an empty union!"
301 new_nonterminals = []
303 for i, arg
in enumerate(args):
305 new_nonterminals.append(f
"{nonterminal}_{i}")
306 return f
"{nonterminal} ::= {' | '.join(new_nonterminals)};\n", zip(result, new_nonterminals)
308 def builtin_literal(current: typing.Type, nonterminal: str):
309 if typing.get_origin(current)
is typing.Literal:
310 args = typing.get_args(current)
311 assert args, f
"{current} from {nonterminal} cannot be an empty literal!"
314 for i, arg
in enumerate(args):
315 if isinstance(arg, str):
316 new_items.append(from_str_to_kbnf_str(arg))
317 elif isinstance(arg, bool):
318 new_items.append(f
'"{str(arg).lower()}"')
319 elif isinstance(arg, int):
320 new_items.append(f
'"{str(arg)}"')
321 elif isinstance(arg, float):
322 new_items.append(f
'"{str(arg)}"')
324 new_items.append(
"null")
325 elif isinstance(arg, tuple):
326 for j,item
in enumerate(arg):
327 new_nonterminal = f
"{nonterminal}_{i}_{j}"
328 result.append((typing.Literal[item], new_nonterminal))
329 new_item = f
"(array_begin {' comma '.join(map(lambda x:x[1], result))} array_end)"
330 new_items.append(new_item)
331 elif isinstance(arg, frozendict):
332 for key, value
in arg.items():
333 new_nonterminal = f
"{nonterminal}_{i}_{key}"
334 result.append((typing.Literal[value], new_nonterminal))
335 new_item = f
"object_begin {' comma '.join(map(lambda x:x[1], result))} object_end"
336 new_items.append(new_item)
338 new_nonterminal = f
"{nonterminal}_{i}"
339 result.append((arg, new_nonterminal))
340 new_items.append(new_nonterminal)
341 return f
"{nonterminal} ::= {' | '.join(new_items)};\n", result
343 def builtin_simple_types(current: typing.Type, nonterminal: str):
344 if isinstance(current, type)
and issubclass(current, bool):
345 return f
"{nonterminal} ::= boolean;\n", []
346 elif isinstance(current, type)
and issubclass(current, int):
347 return f
"{nonterminal} ::= integer;\n", []
348 elif isinstance(current, type)
and issubclass(current, float):
349 return f
"{nonterminal} ::= number;\n", []
350 elif isinstance(current, type)
and issubclass(current, decimal.Decimal):
351 return f
"{nonterminal} ::= number;\n", []
352 elif isinstance(current, type)
and issubclass(current, str):
353 return f
"{nonterminal} ::= string;\n", []
354 elif isinstance(current, type)
and issubclass(current, type(
None)):
355 return f
"{nonterminal} ::= null;\n", []
356 elif current
is typing.Any:
357 return f
"{nonterminal} ::= json_value;\n", []
358 elif isinstance(current, typing.NewType):
359 current: typing.NewType
360 return "", [(current.__supertype__, nonterminal)]
372def _generate_kbnf_grammar(schema: schemas.schema.Schema|collections.abc.Sequence, start_nonterminal: str) -> str:
374 Generate a KBNF grammar string from a schema for JSON format.
377 schema: The schema to generate a grammar for.
378 start_nonterminal: The start nonterminal of the grammar. Default is "start".