15 if (token_id < 0 || token_id >= vs) {
16 Logger::error(
"Token ID out of bounds in lookup_embedding: " +
17 std::to_string(token_id));
18 return std::vector<float>(hs, 0.0f);
21 std::vector<float> embedding_vec(hs, 0.0f);
26 ") is not divisible by GGML_QK_K (" +
27 std::to_string(
GGML_QK_K) +
") for Q4_K embedding lookup.");
32 size_t start_block_idx = (size_t)token_id * blocks_per_row;
33 size_t end_block_idx = start_block_idx + blocks_per_row;
37 "Calculated block index out of bounds for Q4_K embedding table. "
39 std::to_string(token_id) +
40 ", StartBlock: " + std::to_string(start_block_idx) +
41 ", EndBlock: " + std::to_string(end_block_idx) +
47 for (
size_t block_n = 0; block_n < blocks_per_row; ++block_n) {
53 size_t elements_to_copy =
SAFE_MIN((
size_t)
GGML_QK_K, (
size_t)(hs - dest_offset));
54 std::memcpy(&embedding_vec[dest_offset], dequantized_block,
55 elements_to_copy *
sizeof(
float));
63 ") is not divisible by GGML_QK8_0 (" +
65 ") for Q8_0 embedding lookup.");
69 size_t start_block_idx = (size_t)token_id * blocks_per_row;
70 size_t end_block_idx = start_block_idx + blocks_per_row;
74 "Calculated block index out of bounds for Q8_0 embedding table. "
76 std::to_string(token_id) +
77 ", StartBlock: " + std::to_string(start_block_idx) +
78 ", EndBlock: " + std::to_string(end_block_idx) +
85 for (
size_t block_n = 0; block_n < blocks_per_row; ++block_n) {
89 size_t elements_to_copy =
SAFE_MIN(
static_cast<size_t>(
GGML_QK8_0),
static_cast<size_t>(hs - dest_offset));
90 std::memcpy(&embedding_vec[dest_offset], dequantized_block,
91 elements_to_copy *
sizeof(
float));
96 float sum = 0.0f, min_val = embedding_vec[0], max_val = embedding_vec[0];
97 for (
int i = 0; i < hs; ++i) {
98 sum += embedding_vec[i];
99 min_val = std::min(min_val, embedding_vec[i]);
100 max_val = std::max(max_val, embedding_vec[i]);
102 Logger::info(
"[Q8_0_EMBED_FINAL] Token " + std::to_string(token_id) +
103 " embedding stats: sum=" + std::to_string(sum) +
104 ", mean=" + std::to_string(sum / hs) +
105 ", min=" + std::to_string(min_val) +
106 ", max=" + std::to_string(max_val) +
107 ", first_4=[" + std::to_string(embedding_vec[0]) +
108 ", " + std::to_string(embedding_vec[1]) +
109 ", " + std::to_string(embedding_vec[2]) +
110 ", " + std::to_string(embedding_vec[3]) +
"]");
112 return embedding_vec;
118 ") is not divisible by GGML_QK_K (" +
119 std::to_string(
GGML_QK_K) +
") for Q6_K embedding lookup.");
120 return embedding_vec;
123 size_t start_block_idx = (size_t)token_id * blocks_per_row;
124 size_t end_block_idx = start_block_idx + blocks_per_row;
128 "Calculated block index out of bounds for Q6_K embedding table. "
130 std::to_string(token_id) +
131 ", StartBlock: " + std::to_string(start_block_idx) +
132 ", EndBlock: " + std::to_string(end_block_idx) +
134 return embedding_vec;
138 for (
size_t block_n = 0; block_n < blocks_per_row; ++block_n) {
141 size_t dest_offset = block_n *
GGML_QK_K;
142 size_t elements_to_copy =
SAFE_MIN(
static_cast<size_t>(
GGML_QK_K),
static_cast<size_t>(hs - dest_offset));
143 std::memcpy(&embedding_vec[dest_offset], dequantized_block,
144 elements_to_copy *
sizeof(
float));
146 return embedding_vec;
150 size_t offset = (size_t)token_id * hs;
152 Logger::error(
"Embedding offset out of bounds in F32 lookup for token: " +
153 std::to_string(token_id));
154 return embedding_vec;
159 return embedding_vec;
162 size_t offset = (size_t)token_id * hs;
165 "Embedding offset out of bounds in BF16 lookup for token: " +
166 std::to_string(token_id));
167 return embedding_vec;
169 std::vector<uint16_t> token_embedding_bf16(
173 return embedding_vec;
177 "No valid embedding table found (Q4_K, Q8_0, Q6_K, F32, BF16) for token: " +
178 std::to_string(token_id));
180 return embedding_vec;
185 Logger::info(
"[ROPE_FREQ_ENTRY] Entered initialize_rope_freqs.");
189 Logger::error(
"Cannot initialize RoPE frequencies: num_attention_heads is zero.");
193 Logger::info(
"[ROPE_FREQ_CHECK] calculated head_dim: " + std::to_string(head_dim));
195 Logger::error(
"Cannot initialize RoPE frequencies: calculated head_dim is zero.");
198 Logger::info(
"[ROPE_FREQ_CHECK] head_dim % 2 check. head_dim: " + std::to_string(head_dim));
199 if (head_dim % 2 != 0) {
200 Logger::error(
"Cannot initialize RoPE frequencies: head_dim must be even.");
204 Logger::info(
"[ROPE_INIT] Initializing RoPE with head_dim=" + std::to_string(head_dim) +
212 size_t required_size = (
static_cast<size_t>(max_seq_len) * head_dim) / 2;
213 if (required_size == 0) {
214 Logger::warning(
"RoPE precomputation resulted in zero size. Max seq len: " +
215 std::to_string(max_seq_len) +
", head_dim: " + std::to_string(head_dim));
222 for (
int pos = 0; pos < max_seq_len; ++pos) {
223 for (
int i = 0; i < head_dim; i += 2) {
224 float freq = 1.0f / std::pow(rope_theta,
float(i) / head_dim);
225 float val =
static_cast<float>(pos) * freq;
226 float cos_val = std::cos(val);
227 float sin_val = std::sin(val);
228 size_t flat_idx = (
static_cast<size_t>(pos) * head_dim / 2) + (i / 2);
232 Logger::error(
"RoPE precomputation index out of bounds: " + std::to_string(flat_idx) +