79 auto get_meta_string = [&](
const std::string& key,
80 const std::string& default_val) -> std::string {
83 std::holds_alternative<std::string>(it->second)) {
84 return std::get<std::string>(it->second);
89 auto get_meta_value = [&](
const std::string& key,
auto default_value) {
90 using TargetType =
typename std::decay<
decltype(default_value)>::type;
94 [&](
const auto& val) -> TargetType {
95 using T = std::decay_t<
decltype(val)>;
97 if constexpr (std::is_integral_v<TargetType>) {
98 if constexpr (std::is_integral_v<T> && !std::is_same_v<T, bool>) {
99 if constexpr (std::is_unsigned_v<T> &&
100 std::is_signed_v<TargetType>) {
101 if (val >
static_cast<std::make_unsigned_t<TargetType>
>(
102 std::numeric_limits<TargetType>::max())) {
104 std::to_string(val) +
105 " overflows TargetType. Using default.");
106 return default_value;
110 else if constexpr (std::is_signed_v<T> &&
111 std::is_signed_v<TargetType> &&
112 sizeof(T) >
sizeof(TargetType)) {
113 if (val >
static_cast<T
>(
114 std::numeric_limits<TargetType>::max()) ||
115 val <
static_cast<T
>(
116 std::numeric_limits<TargetType>::lowest())) {
118 std::to_string(val) +
119 " overflows TargetType. Using default.");
120 return default_value;
123 return static_cast<TargetType
>(val);
125 }
else if constexpr (std::is_floating_point_v<TargetType>) {
126 if constexpr (std::is_floating_point_v<T>) {
127 return static_cast<TargetType
>(val);
129 }
else if constexpr (std::is_same_v<TargetType, bool>) {
130 if constexpr (std::is_same_v<T, bool>) {
133 }
else if constexpr (std::is_same_v<TargetType, std::string>) {
134 if constexpr (std::is_same_v<T, std::string>) {
139 "' has stored type incompatible with requested "
140 "TargetType. Using default.");
141 return default_value;
145 return default_value;
149 config.
vocab_size = get_meta_value(
"tokenizer.ggml.vocab_size",
150 get_meta_value(
"llama.vocab_size", 32000));
151 config.
hidden_size = get_meta_value(
"llama.embedding_length", 4096);
162 ", overriding to sensible default (2048)");
166 get_meta_value(
"llama.attention.layer_norm_rms_epsilon", 1e-5f);
167 config.
rope_theta = get_meta_value(
"llama.rope.freq_base", 10000.0f);
169 config.
bos_token_id = get_meta_value(
"tokenizer.ggml.bos_token_id", -1);
170 config.
eos_token_id = get_meta_value(
"tokenizer.ggml.eos_token_id", -1);
171 config.
unk_token_id = get_meta_value(
"tokenizer.ggml.unk_token_id", -1);
172 config.
pad_token_id = get_meta_value(
"tokenizer.ggml.padding_token_id", -1);
174 config.
architecture = get_meta_string(
"general.architecture",
"unknown");
175 config.
model_name = get_meta_string(
"general.name",
"unknown");
176 bool has_pre_key = gguf.
metadata.count(
"tokenizer.ggml.pre");
180 ", Vocab Size: " + std::to_string(config.
vocab_size) +
181 ", Has Merges: " + (has_merges ?
"Yes" :
"No"));
184 Logger::info(
"[parse_gguf_config] Identifying tokenizer family...");
185 bool is_llama3_arch_hint = (config.
architecture.find(
"llama3") != std::string::npos ||
186 config.
architecture.find(
"Llama-3") != std::string::npos ||
187 config.
architecture.find(
"Meta-Llama-3") != std::string::npos);
188 bool is_llama3_vocab_size = (config.
vocab_size == 128256);
189 std::string ggml_tokenizer_model = get_meta_string(
"tokenizer.ggml.model",
"");
190 bool is_tiktoken_style_tokenizer_model = (ggml_tokenizer_model ==
"gpt2");
192 Logger::info(
"[parse_gguf_config] L3 Hints: arch_hint=" + std::string(is_llama3_arch_hint ?
"Y":
"N") +
193 ", vocab_size_match=" + std::string(is_llama3_vocab_size ?
"Y":
"N") +
194 ", has_merges=" + std::string(has_merges ?
"Y":
"N") +
195 ", ggml_tokenizer_model_key='" + ggml_tokenizer_model +
"' (is_tiktoken_style: " + std::string(is_tiktoken_style_tokenizer_model ?
"Y":
"N") +
")" );
197 if (has_merges && is_llama3_vocab_size && is_tiktoken_style_tokenizer_model) {
199 Logger::info(
"[parse_gguf_config] Result: Identified LLAMA3_TIKTOKEN (merges + vocab_size + ggml_tokenizer_model='gpt2'). Architecture string was: '" + config.
architecture +
"'");
200 if (!is_llama3_arch_hint && config.
architecture ==
"llama") {
201 Logger::info(
"[parse_gguf_config] Note: Classified as Llama 3 based on tokenizer/vocab, but arch string was 'llama'.");
204 float llama3_rope_candidate = get_meta_value(
"llama.rope.freq_base", 500000.0f);
205 if (llama3_rope_candidate > 10000.0f) {
207 Logger::info(
"[parse_gguf_config] Adjusted rope_theta to " + std::to_string(config.
rope_theta) +
" for Llama 3 model (was 10000.0).");
212 Logger::info(
"[parse_gguf_config] Result: Identified LLAMA_SENTENCEPIECE based on architecture: '" + config.
architecture +
"'");
215 Logger::info(
"[parse_gguf_config] Result: UNKNOWN tokenizer family for architecture: '" + config.
architecture +
"'");
219 if (config.
model_name.find(
"TinyLlama") != std::string::npos ||
222 }
else if (config.
architecture ==
"llama" && !has_pre_key) {
233 get_meta_string(
"tokenizer.ggml.pre",
"unknown");
240 "', model_name='" + config.
model_name +
"', chat_template='" +
247 "Inferred chat_template_type='llama2' based on model_type and "
248 "missing/different pre_tokenizer_type.");
251 auto template_it = gguf.
metadata.find(
"tokenizer.chat_template");
252 if (template_it != gguf.
metadata.end() &&
253 std::holds_alternative<std::string>(template_it->second)) {
255 Logger::info(
"Found tokenizer.chat_template in metadata.");
259 "tokenizer.chat_template not found or not a string in metadata. Will "
260 "use fallback logic.");
267 "Inferred chat_template_type='llama2' based on model name and "
268 "missing/different pre_tokenizer_type.");
270 Logger::info(
"Llama 3 model identified. Chat template will primarily rely on 'tokenizer.chat_template' from GGUF if present.");
272 if (gguf.
metadata.count(
"tokenizer.chat_template")) {
276 Logger::warning(
"Llama 3 model detected, but 'tokenizer.chat_template' not found in GGUF metadata.");
281 Logger::info(std::string(
"[parse_gguf_config] Finished parsing. Returning config. Family: ") +