2This module integrates the ExLlamaV2 library by providing convenience utilities.
5from copy
import copy, deepcopy
8from exllamav2
import ExLlamaV2Tokenizer, ExLlamaV2
9from exllamav2.generator.base
import ExLlamaV2Filter
13from functools
import lru_cache
18 Create a vocabulary for the KBNF engine.
20 assert hasattr(tokenizer.tokenizer_model,
"vocab"), (f
"tokenizer({tokenizer})"
21 f
" with tokenizer_model({tokenizer.tokenizer_model})"
22 f
" does not have vocab attribute!")
23 vocab = {tokenizer.tokenizer_model.id_to_piece(
24 i): i
for i
in range(tokenizer.tokenizer_model.vocab_size())}
25 new_vocab = get_original_characters(vocab)
26 return kbnf.Vocabulary({v: kbnf.Token(k)
for k, v
in new_vocab.items()},
27 {k: v
for k, v
in enumerate(vocab)})
31 formatter_builder: FormatterBuilder,
32 engine_config: EngineGenerationConfig =
None) -> ExLlamaV2Filter:
34 Create a formatter filter for the ExLlamaV2 engine.
37 f = formatter_builder.build(
38 vocab,
lambda tokens: tokenizer.decode(torch.tensor(tokens)))
44 ExLlamaV2Filter that uses a formatter to mask logits.
47 def __init__(self, model, tokenizer, formatter: FormatterBase,
48 config: EngineGenerationConfig =
None):
58 Check if the formatter is completed.
62 def clone(self, c=None) -> "FormatterFilter":
64 c = FormatterFilter.__new__(FormatterFilter)
66 c.tokenizer = self.tokenizer
67 c.sequence_str = self.sequence_str
70 c._config = deepcopy(self.
_config)
74 def begin(self, prefix_str: str) ->
None:
78 prompt = prefix_str.encode(
"utf-8")
81 def reset(self) -> None:
84 def feed(self, token: int):
91 def next_set(self) -> typing.Tuple[typing.Set[int], typing.Set[int]]:
93 return {self.tokenizer.eos_token_id}, {self.tokenizer.eos_token_id}
100 def next(self) -> typing.Tuple[typing.Sequence[int], typing.Sequence[int]]:
102 if not hasattr(self,
"allow_return_type_list"):
105 return [self.tokenizer.eos_token_id], [self.tokenizer.eos_token_id]
107 return self.
_formatter.get_allowed_tokens_since_last_computation(), []