TinyLlama.cpp 1.0
A lightweight C++ implementation of the TinyLlama language model
Loading...
Searching...
No Matches
Tokenizer Member List

This is the complete list of members for Tokenizer, including all inherited members.

add_bigram_to_queue_refactored(const char *text_data_base, const std::vector< llm_symbol > &symbols, llm_symbol::index first_symbol_idx, std::priority_queue< std::pair< int, int >, std::vector< std::pair< int, int > >, std::greater< std::pair< int, int > > > &work_queue) constTokenizerprivate
added_tokens_Tokenizerprivate
apply_chat_template(const std::string &user_prompt, const std::string &system_message, const ModelConfig &config) constTokenizer
bos_token_Tokenizerprivate
bos_token_id() constTokenizerinline
bos_token_id_Tokenizerprivate
bpe_merges_Tokenizerprivate
bpe_tokenize(const std::string &text) constTokenizerprivate
bpe_tokenize_from_scores(const std::string &text) constTokenizerprivate
bpe_tokenize_to_ids(const std::string &text, bool add_bos_token_param, bool add_eos_token_param, bool ignore_merges_param) constTokenizerprivate
byte_char_to_id_Tokenizerprivate
capitalize_first_letter(std::string s) constTokenizerprivate
chat_template_special_tokensTokenizerprivate
decode(const std::vector< int > &ids, bool skip_special_tokens=true) constTokenizer
decode_sentencepiece(const std::vector< int > &ids, bool skip_special_tokens) constTokenizerprivate
DEFAULT enum valueTokenizer
detokenize(const std::vector< std::string > &tokens) constTokenizer
encode(const std::string &text, bool add_bos=true, bool add_eos=false, PreTokenizeMethod pre_tok_override=PreTokenizeMethod::DEFAULT) constTokenizer
eos_token_Tokenizerprivate
eos_token_id() constTokenizerinline
eos_token_id_Tokenizerprivate
find_bpe_rank(const std::string &token_left, const std::string &token_right) constTokenizerprivate
get_gguf_chat_template() constTokenizer
gguf_chat_template_Tokenizerprivate
id_to_added_token_Tokenizerprivate
id_to_token_Tokenizerprivate
ids_to_tokens(const std::vector< int > &ids) constTokenizer
initialized_from_gguf_Tokenizerprivate
is_added_token(int id) constTokenizer
LLAMA_REGEX enum valueTokenizer
load_bpe_merges_from_json(const std::string &model_path)Tokenizerprivate
load_sentencepiece_model(const std::string &model_path)Tokenizerprivate
load_vocab_from_json(const std::string &vocab_path, std::unordered_map< std::string, int > &token_to_id, std::vector< std::string > &id_to_token)Tokenizerprivate
pad_token_Tokenizerprivate
pad_token_id() constTokenizerinline
pad_token_id_Tokenizerprivate
pre_tok_type_Tokenizerprivate
PreTokenizeMethod enum nameTokenizer
sentencepiece_model_loaded_Tokenizerprivate
tiktoken_merges_list_Tokenizerprivate
token_scores_Tokenizerprivate
token_to_id_Tokenizerprivate
token_types_Tokenizerprivate
tokenize(const std::string &text) constTokenizer
Tokenizer(const std::string &vocab_path, const std::string &model_path, const ModelConfig &config)Tokenizer
Tokenizer(const GGUFData &gguf_data, const ModelConfig &config)Tokenizerexplicit
tokenizer_family_Tokenizerprivate
tokens_to_ids(const std::vector< std::string > &tokens) constTokenizerprivate
Type enum nameTokenizer
type_Tokenizerprivate
unk_token_Tokenizerprivate
unk_token_id() constTokenizerinline
unk_token_id_Tokenizerprivate
vocab_size() constTokenizer