Formatron
v0.4.9
v0.4.9
v0.4.8
v0.4.7
v0.4.6
v0.4.5
v0.4.4
v0.4.3
v0.4.2
v0.4.1
v0.4.0
v0.3.4
v0.3.3
v0.3.2
v0.3.1
v0.3.0
v0.2.0
v0.1.3
v0.1.2
v0.1.1
Formatron empowers everyone to control the output format of language models with minimal overhead.
Loading...
Searching...
No Matches
regex.py
Go to the documentation of this file.
1
"""
2
This module contains the RegexExtractor class, which is used to extract data using a regular expression.
3
"""
4
import
re
5
import
typing
6
from
formatron.extractor
import
NonterminalExtractor
7
8
9
class
RegexExtractor
(
NonterminalExtractor
):
10
"""
11
An extractor that extracts a string using a regular expression.
12
"""
13
14
def
__init__
(self, regex: str, capture_name: str, nonterminal: str):
15
"""
16
Initialize the regex extractor.
17
18
Args:
19
regex: The regular expression for extraction.
20
capture_name: The name of the capture, or `None` if the extractor does not capture.
21
nonterminal: The nonterminal representing the extractor.
22
"""
23
super().
__init__
(nonterminal, capture_name)
24
self.
_regex
= re.compile(regex)
25
26
def
extract
(self, input_str: str) -> typing.Optional[tuple[str, re.Match |
None
]]:
27
"""
28
Extract the string using the regular expression.
29
Specifically, the first match(if any) of the regex pattern in the input string is returned.
30
31
Args:
32
input_str: The input string.
33
Returns:
34
The remaining string and the extracted `re.Match` object, or `None` if the extraction failed.
35
"""
36
matched = self.
_regex
.match(input_str)
37
if
not
matched:
38
return
None
39
return
input_str[matched.span()[1]:], matched
40
41
@property
42
def
kbnf_definition
(self) -> str:
43
return
f
"{self.nonterminal} ::= #{repr(self._regex.pattern)};"
44
45
46
class
RegexComplementExtractor
(
NonterminalExtractor
):
47
"""
48
An extractor that extracts data by matching a regex complement.
49
"""
50
51
def
__init__
(self, regex: str, capture_name: str, nonterminal: str):
52
"""
53
Initialize the regex complement extractor.
54
"""
55
super().
__init__
(nonterminal, capture_name)
56
self.
_regex
= re.compile(regex)
57
58
def
extract
(self, input_str: str) -> typing.Optional[tuple[str, str]]:
59
"""
60
Extract the data by matching a regex complement.
61
62
Specifically, the string until the first character in the first match of the regex is extracted if there is a match,
63
or the entire string is extracted if there is no match.
64
"""
65
matched = self.
_regex
.search(input_str)
66
if
not
matched:
67
return
""
, input_str
68
return
input_str[matched.span()[0]:], input_str[:matched.span()[0]]
69
70
@property
71
def
kbnf_definition
(self) -> str:
72
return
f
"{self.nonterminal} ::= #ex{repr(self._regex.pattern)};"
formatron.extractor.NonterminalExtractor
An extractor that extracts data corresponding to a nonterminal.
Definition
extractor.py:98
formatron.formats.regex.RegexComplementExtractor
An extractor that extracts data by matching a regex complement.
Definition
regex.py:59
formatron.formats.regex.RegexComplementExtractor.kbnf_definition
str kbnf_definition(self)
Definition
regex.py:88
formatron.formats.regex.RegexComplementExtractor._regex
_regex
Definition
regex.py:66
formatron.formats.regex.RegexExtractor
An extractor that extracts a string using a regular expression.
Definition
regex.py:13
formatron.formats.regex.RegexExtractor._regex
_regex
Definition
regex.py:25
formatron.formats.regex.RegexExtractor.__init__
__init__(self, str regex, str capture_name, str nonterminal)
Initialize the regex extractor.
Definition
regex.py:23
formatron.formats.regex.RegexExtractor.extract
typing.Optional[tuple[str, re.Match|None]] extract(self, str input_str)
Extract the string using the regular expression.
Definition
regex.py:36
formatron.formats.regex.RegexExtractor.kbnf_definition
str kbnf_definition(self)
Definition
regex.py:50
formatron.extractor
Extractors for extracting data from generated strings.
Definition
extractor.py:1
src
formatron
formats
regex.py
Generated by
1.11.0