Formatron v0.4.9
Formatron empowers everyone to control the output format of language models with minimal overhead.
Loading...
Searching...
No Matches
regex.py
Go to the documentation of this file.
1"""
2This module contains the RegexExtractor class, which is used to extract data using a regular expression.
3"""
4import re
5import typing
6from formatron.extractor import NonterminalExtractor
7
8
10 """
11 An extractor that extracts a string using a regular expression.
12 """
14 def __init__(self, regex: str, capture_name: str, nonterminal: str):
15 """
16 Initialize the regex extractor.
17
18 Args:
19 regex: The regular expression for extraction.
20 capture_name: The name of the capture, or `None` if the extractor does not capture.
21 nonterminal: The nonterminal representing the extractor.
22 """
23 super().__init__(nonterminal, capture_name)
24 self._regex = re.compile(regex)
26 def extract(self, input_str: str) -> typing.Optional[tuple[str, re.Match | None]]:
27 """
28 Extract the string using the regular expression.
29 Specifically, the first match(if any) of the regex pattern in the input string is returned.
30
31 Args:
32 input_str: The input string.
33 Returns:
34 The remaining string and the extracted `re.Match` object, or `None` if the extraction failed.
35 """
36 matched = self._regex.match(input_str)
37 if not matched:
38 return None
39 return input_str[matched.span()[1]:], matched
40
41 @property
42 def kbnf_definition(self) -> str:
43 return f"{self.nonterminal} ::= #{repr(self._regex.pattern)};"
44
45
47 """
48 An extractor that extracts data by matching a regex complement.
49 """
51 def __init__(self, regex: str, capture_name: str, nonterminal: str):
52 """
53 Initialize the regex complement extractor.
54 """
55 super().__init__(nonterminal, capture_name)
56 self._regex = re.compile(regex)
57
58 def extract(self, input_str: str) -> typing.Optional[tuple[str, str]]:
59 """
60 Extract the data by matching a regex complement.
61
62 Specifically, the string until the first character in the first match of the regex is extracted if there is a match,
63 or the entire string is extracted if there is no match.
64 """
65 matched = self._regex.search(input_str)
66 if not matched:
67 return "", input_str
68 return input_str[matched.span()[0]:], input_str[:matched.span()[0]]
69
70 @property
71 def kbnf_definition(self) -> str:
72 return f"{self.nonterminal} ::= #ex{repr(self._regex.pattern)};"
An extractor that extracts data corresponding to a nonterminal.
Definition extractor.py:98
An extractor that extracts data by matching a regex complement.
Definition regex.py:59
An extractor that extracts a string using a regular expression.
Definition regex.py:13
__init__(self, str regex, str capture_name, str nonterminal)
Initialize the regex extractor.
Definition regex.py:23
typing.Optional[tuple[str, re.Match|None]] extract(self, str input_str)
Extract the string using the regular expression.
Definition regex.py:36
Extractors for extracting data from generated strings.
Definition extractor.py:1