Formatron v0.4.2
Formatron empowers everyone to control the output format of language models with minimal overhead.
Loading...
Searching...
No Matches
regex.py
Go to the documentation of this file.
1"""
2This module contains the RegexExtractor class, which is used to extract data using a regular expression.
3"""
4import re
5import typing
6from formatron.extractor import NonterminalExtractor
7
8
10 """
11 An extractor that extracts a string using a regular expression.
12 """
14 def __init__(self, regex: str, capture_name: str, nonterminal: str):
15 """
16 Initialize the regex extractor.
17
18 Args:
19 regex: The regular expression for extraction.
20 capture_name: The name of the capture, or `None` if the extractor does not capture.
21 nonterminal: The nonterminal representing the extractor.
22 """
23 super().__init__(nonterminal, capture_name)
24 self._regex = re.compile(regex)
26 def extract(self, input_str: str) -> typing.Optional[tuple[str, re.Match | None]]:
27 """
28 Extract the string using the regular expression.
29 Specifically, the first match(if any) of the regex pattern in the input string is returned.
30
31 Args:
32 input_str: The input string.
33 Returns:
34 The remaining string and the extracted `re.Match` object, or `None` if the extraction failed.
35 """
36 matched = self._regex.match(input_str)
37 if not matched:
38 return None
39 return input_str[matched.span()[1]:], matched
40
41 @property
42 def kbnf_definition(self) -> str:
43 return f"{self.nonterminal} ::= #{repr(self._regex.pattern)};"
An extractor that extracts data corresponding to a nonterminal.
Definition extractor.py:98
An extractor that extracts a string using a regular expression.
Definition regex.py:13
__init__(self, str regex, str capture_name, str nonterminal)
Initialize the regex extractor.
Definition regex.py:23
typing.Optional[tuple[str, re.Match|None]] extract(self, str input_str)
Extract the string using the regular expression.
Definition regex.py:36
Extractors for extracting data from generated strings.
Definition extractor.py:1