75 {
78
79 auto get_meta_string = [&](const std::string& key,
80 const std::string& default_val) -> std::string {
83 std::holds_alternative<std::string>(it->second)) {
84 return std::get<std::string>(it->second);
85 }
86 return default_val;
87 };
88
89 auto get_meta_value = [&](const std::string& key, auto default_value) {
90 using TargetType = typename std::decay<decltype(default_value)>::type;
93 return std::visit(
94 [&](const auto& val) -> TargetType {
95 using T = std::decay_t<decltype(val)>;
96
97 if constexpr (std::is_integral_v<TargetType>) {
98 if constexpr (std::is_integral_v<T> && !std::is_same_v<T, bool>) {
99 if constexpr (std::is_unsigned_v<T> &&
100 std::is_signed_v<TargetType>) {
101 if (val > static_cast<std::make_unsigned_t<TargetType>>(
102 std::numeric_limits<TargetType>::max())) {
104 std::to_string(val) +
105 " overflows TargetType. Using default.");
106 return default_value;
107 }
108 }
109
110 else if constexpr (std::is_signed_v<T> &&
111 std::is_signed_v<TargetType> &&
112 sizeof(T) > sizeof(TargetType)) {
113 if (val > static_cast<T>(
114 std::numeric_limits<TargetType>::max()) ||
115 val < static_cast<T>(
116 std::numeric_limits<TargetType>::lowest())) {
118 std::to_string(val) +
119 " overflows TargetType. Using default.");
120 return default_value;
121 }
122 }
123 return static_cast<TargetType>(val);
124 }
125 } else if constexpr (std::is_floating_point_v<TargetType>) {
126 if constexpr (std::is_floating_point_v<T>) {
127 return static_cast<TargetType>(val);
128 }
129 } else if constexpr (std::is_same_v<TargetType, bool>) {
130 if constexpr (std::is_same_v<T, bool>) {
131 return val;
132 }
133 } else if constexpr (std::is_same_v<TargetType, std::string>) {
134 if constexpr (std::is_same_v<T, std::string>) {
135 return val;
136 }
137 }
139 "' has stored type incompatible with requested "
140 "TargetType. Using default.");
141 return default_value;
142 },
143 it->second);
144 } else {
145 return default_value;
146 }
147 };
148
149 config.
vocab_size = get_meta_value(
"tokenizer.ggml.vocab_size",
150 get_meta_value("llama.vocab_size", 32000));
151 config.
hidden_size = get_meta_value(
"llama.embedding_length", 4096);
162 ", overriding to sensible default (2048)");
164 }
166 get_meta_value("llama.attention.layer_norm_rms_epsilon", 1e-5f);
167 config.
rope_theta = get_meta_value(
"llama.rope.freq_base", 10000.0f);
169 config.
bos_token_id = get_meta_value(
"tokenizer.ggml.bos_token_id", -1);
170 config.
eos_token_id = get_meta_value(
"tokenizer.ggml.eos_token_id", -1);
171 config.
unk_token_id = get_meta_value(
"tokenizer.ggml.unk_token_id", -1);
172 config.
pad_token_id = get_meta_value(
"tokenizer.ggml.padding_token_id", -1);
173
174 config.
architecture = get_meta_string(
"general.architecture",
"unknown");
175 config.
model_name = get_meta_string(
"general.name",
"unknown");
176 bool has_pre_key = gguf.
metadata.count(
"tokenizer.ggml.pre");
178
180 ", Vocab Size: " + std::to_string(config.
vocab_size) +
181 ", Has Merges: " + (has_merges ? "Yes" : "No"));
182
183
184 Logger::info(
"[parse_gguf_config] Identifying tokenizer family...");
185 bool is_llama3_arch_hint = (config.
architecture.find(
"llama3") != std::string::npos ||
186 config.
architecture.find(
"Llama-3") != std::string::npos ||
187 config.
architecture.find(
"Meta-Llama-3") != std::string::npos);
188 bool is_llama3_vocab_size = (config.
vocab_size == 128256);
189 std::string ggml_tokenizer_model = get_meta_string("tokenizer.ggml.model", "");
190 bool is_tiktoken_style_tokenizer_model = (ggml_tokenizer_model == "gpt2");
191
192 Logger::info(
"[parse_gguf_config] L3 Hints: arch_hint=" + std::string(is_llama3_arch_hint ?
"Y":
"N") +
193 ", vocab_size_match=" + std::string(is_llama3_vocab_size ? "Y":"N") +
194 ", has_merges=" + std::string(has_merges ? "Y":"N") +
195 ", ggml_tokenizer_model_key='" + ggml_tokenizer_model + "' (is_tiktoken_style: " + std::string(is_tiktoken_style_tokenizer_model ? "Y":"N") + ")" );
196
197 if (has_merges && is_llama3_vocab_size && is_tiktoken_style_tokenizer_model) {
199 Logger::info(
"[parse_gguf_config] Result: Identified LLAMA3_TIKTOKEN (merges + vocab_size + ggml_tokenizer_model='gpt2'). Architecture string was: '" + config.
architecture +
"'");
200 if (!is_llama3_arch_hint && config.
architecture ==
"llama") {
201 Logger::info(
"[parse_gguf_config] Note: Classified as Llama 3 based on tokenizer/vocab, but arch string was 'llama'.");
202 }
204 float llama3_rope_candidate = get_meta_value("llama.rope.freq_base", 500000.0f);
205 if (llama3_rope_candidate > 10000.0f) {
207 Logger::info(
"[parse_gguf_config] Adjusted rope_theta to " + std::to_string(config.
rope_theta) +
" for Llama 3 model (was 10000.0).");
208 }
209 }
212 Logger::info(
"[parse_gguf_config] Result: Identified LLAMA_SENTENCEPIECE based on architecture: '" + config.
architecture +
"'");
213 } else {
215 Logger::info(
"[parse_gguf_config] Result: UNKNOWN tokenizer family for architecture: '" + config.
architecture +
"'");
216 }
217
218
219 if (config.
model_name.find(
"TinyLlama") != std::string::npos ||
222 }
else if (config.
architecture ==
"llama" && !has_pre_key) {
224 } else {
228 "'.");
229 }
230
231 if (has_pre_key) {
233 get_meta_string("tokenizer.ggml.pre", "unknown");
236 } else {
238 }
240 "', model_name='" + config.
model_name +
"', chat_template='" +
243
247 "Inferred chat_template_type='llama2' based on model_type and "
248 "missing/different pre_tokenizer_type.");
249 }
250
251 auto template_it = gguf.
metadata.find(
"tokenizer.chat_template");
252 if (template_it != gguf.
metadata.end() &&
253 std::holds_alternative<std::string>(template_it->second)) {
255 Logger::info(
"Found tokenizer.chat_template in metadata.");
256
257 } else {
259 "tokenizer.chat_template not found or not a string in metadata. Will "
260 "use fallback logic.");
262 }
267 "Inferred chat_template_type='llama2' based on model name and "
268 "missing/different pre_tokenizer_type.");
270 Logger::info(
"Llama 3 model identified. Chat template will primarily rely on 'tokenizer.chat_template' from GGUF if present.");
271
272 if (gguf.
metadata.count(
"tokenizer.chat_template")) {
274 } else {
276 Logger::warning(
"Llama 3 model detected, but 'tokenizer.chat_template' not found in GGUF metadata.");
277 }
278 }
279 }
280
281 Logger::info(std::string(
"[parse_gguf_config] Finished parsing. Returning config. Family: ") +
283 (config.tokenizer_family ==
ModelConfig::TokenizerFamily::LLAMA_SENTENCEPIECE ?
"L2_SPM" :
"UNKNOWN")));
284 return config;
285}
std::vector< std::string > tokenizer_merges
std::map< std::string, GGUFMetadataValue > metadata
std::string chat_template_string
std::string pre_tokenizer_type
std::string chat_template_type