|
TinyLlama.cpp 1.0
A lightweight C++ implementation of the TinyLlama language model
|
This is the complete list of members for Tokenizer, including all inherited members.
| add_bigram_to_queue_refactored(const char *text_data_base, const std::vector< llm_symbol > &symbols, llm_symbol::index first_symbol_idx, std::priority_queue< std::pair< int, int >, std::vector< std::pair< int, int > >, std::greater< std::pair< int, int > > > &work_queue) const | Tokenizer | private |
| added_tokens_ | Tokenizer | private |
| apply_chat_template(const std::string &user_prompt, const std::string &system_message, const ModelConfig &config) const | Tokenizer | |
| bos_token_ | Tokenizer | private |
| bos_token_id() const | Tokenizer | inline |
| bos_token_id_ | Tokenizer | private |
| bpe_merges_ | Tokenizer | private |
| bpe_tokenize(const std::string &text) const | Tokenizer | private |
| bpe_tokenize_from_scores(const std::string &text) const | Tokenizer | private |
| bpe_tokenize_to_ids(const std::string &text, bool add_bos_token_param, bool add_eos_token_param, bool ignore_merges_param) const | Tokenizer | private |
| byte_char_to_id_ | Tokenizer | private |
| capitalize_first_letter(std::string s) const | Tokenizer | private |
| chat_template_special_tokens | Tokenizer | private |
| decode(const std::vector< int > &ids, bool skip_special_tokens=true) const | Tokenizer | |
| decode_sentencepiece(const std::vector< int > &ids, bool skip_special_tokens) const | Tokenizer | private |
| DEFAULT enum value | Tokenizer | |
| detokenize(const std::vector< std::string > &tokens) const | Tokenizer | |
| encode(const std::string &text, bool add_bos=true, bool add_eos=false, PreTokenizeMethod pre_tok_override=PreTokenizeMethod::DEFAULT) const | Tokenizer | |
| eos_token_ | Tokenizer | private |
| eos_token_id() const | Tokenizer | inline |
| eos_token_id_ | Tokenizer | private |
| find_bpe_rank(const std::string &token_left, const std::string &token_right) const | Tokenizer | private |
| get_gguf_chat_template() const | Tokenizer | |
| gguf_chat_template_ | Tokenizer | private |
| id_to_added_token_ | Tokenizer | private |
| id_to_token_ | Tokenizer | private |
| ids_to_tokens(const std::vector< int > &ids) const | Tokenizer | |
| initialized_from_gguf_ | Tokenizer | private |
| is_added_token(int id) const | Tokenizer | |
| LLAMA_REGEX enum value | Tokenizer | |
| load_bpe_merges_from_json(const std::string &model_path) | Tokenizer | private |
| load_sentencepiece_model(const std::string &model_path) | Tokenizer | private |
| load_vocab_from_json(const std::string &vocab_path, std::unordered_map< std::string, int > &token_to_id, std::vector< std::string > &id_to_token) | Tokenizer | private |
| pad_token_ | Tokenizer | private |
| pad_token_id() const | Tokenizer | inline |
| pad_token_id_ | Tokenizer | private |
| pre_tok_type_ | Tokenizer | private |
| PreTokenizeMethod enum name | Tokenizer | |
| sentencepiece_model_loaded_ | Tokenizer | private |
| tiktoken_merges_list_ | Tokenizer | private |
| token_scores_ | Tokenizer | private |
| token_to_id_ | Tokenizer | private |
| token_types_ | Tokenizer | private |
| tokenize(const std::string &text) const | Tokenizer | |
| Tokenizer(const std::string &vocab_path, const std::string &model_path, const ModelConfig &config) | Tokenizer | |
| Tokenizer(const GGUFData &gguf_data, const ModelConfig &config) | Tokenizer | explicit |
| tokenizer_family_ | Tokenizer | private |
| tokens_to_ids(const std::vector< std::string > &tokens) const | Tokenizer | private |
| Type enum name | Tokenizer | |
| type_ | Tokenizer | private |
| unk_token_ | Tokenizer | private |
| unk_token_id() const | Tokenizer | inline |
| unk_token_id_ | Tokenizer | private |
| vocab_size() const | Tokenizer |