TinyLlama.cpp 1.0
A lightweight C++ implementation of the TinyLlama language model
Loading...
Searching...
No Matches
Functions
gguf_parser.cpp File Reference
#include "gguf_parser.h"
#include <iomanip>
#include <iostream>
#include <numeric>
#include <sstream>
#include <stdexcept>
#include <vector>
#include <cstring>
#include <cerrno>
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include "logger.h"
#include "quantization.h"
Include dependency graph for gguf_parser.cpp:

Go to the source code of this file.

Functions

size_t gguf_value_type_size (GGUFValueType type)
 
template<typename T >
void read_raw (std::ifstream &file, T &dest)
 Reads raw binary data from a file stream.
 
template void read_raw< uint8_t > (std::ifstream &, uint8_t &)
 
template void read_raw< int8_t > (std::ifstream &, int8_t &)
 
template void read_raw< uint16_t > (std::ifstream &, uint16_t &)
 
template void read_raw< int16_t > (std::ifstream &, int16_t &)
 
template void read_raw< uint32_t > (std::ifstream &, uint32_t &)
 
template void read_raw< int32_t > (std::ifstream &, int32_t &)
 
template void read_raw< float > (std::ifstream &, float &)
 
template void read_raw< uint64_t > (std::ifstream &, uint64_t &)
 
template void read_raw< int64_t > (std::ifstream &, int64_t &)
 
template void read_raw< double > (std::ifstream &, double &)
 
template void read_raw< GGUFValueType > (std::ifstream &, GGUFValueType &)
 
std::string read_gguf_string (std::ifstream &file)
 Reads a string from a GGUF format file.
 
GGUFData load_gguf_meta (const std::string &filename, bool use_mmap)
 Loads GGUF metadata and optionally memory-maps tensor data.
 

Function Documentation

◆ gguf_value_type_size()

size_t gguf_value_type_size ( GGUFValueType  type)

Definition at line 28 of file gguf_parser.cpp.

28 {
29 switch (type) {
31 return sizeof(uint8_t);
33 return sizeof(int8_t);
35 return sizeof(uint16_t);
37 return sizeof(int16_t);
39 return sizeof(uint32_t);
41 return sizeof(int32_t);
43 return sizeof(float);
45 return sizeof(uint8_t);
47 return sizeof(uint64_t);
49 return sizeof(int64_t);
51 return sizeof(double);
53 return 0;
55 return 0;
56 default:
57 return 0;
58 }
59}

References ARRAY, BOOL, FLOAT32, FLOAT64, INT16, INT32, INT64, INT8, STRING, UINT16, UINT32, UINT64, and UINT8.

Referenced by load_gguf_meta().

◆ load_gguf_meta()

GGUFData load_gguf_meta ( const std::string &  filename,
bool  use_mmap 
)

Loads GGUF metadata and optionally memory-maps tensor data.

Parses the header, metadata, and tensor information from a GGUF file. If mmap is enabled, it will also memory-map the tensor data region. If mmap is disabled, tensor data pointers will be null and fd will be -1.

Parameters
filenamePath to the GGUF file.
use_mmapWhether to memory-map the tensor data block.
Returns
GGUFData structure containing loaded information.
Exceptions
std::runtime_erroron file I/O or format errors.

Definition at line 123 of file gguf_parser.cpp.

123 {
124 Logger::info("Attempting to load GGUF file: " + filename + (use_mmap ? " with mmap" : " without mmap"));
125 std::ifstream metadata_file(filename, std::ios::binary);
126 if (!metadata_file.is_open()) {
127 throw std::runtime_error("Failed to open file for metadata: " + filename);
128 }
129
130 GGUFData result;
131 // The file_descriptor for mmap will be opened separately and stored in result.
132 // The GGUFData destructor will handle closing this fd and munmap.
133
134 read_raw(metadata_file, result.header.magic);
135 read_raw(metadata_file, result.header.version);
136 read_raw(metadata_file, result.header.tensor_count);
137 read_raw(metadata_file, result.header.metadata_kv_count);
138
139 {
140 std::stringstream ss;
141 ss << "Read Header:\n"
142 << " Magic: 0x" << std::hex << result.header.magic << std::dec << "\n"
143 << " Version: " << result.header.version << "\n"
144 << " Tensor Count: " << result.header.tensor_count << "\n"
145 << " Metadata KV Count: " << result.header.metadata_kv_count;
146 Logger::info(ss.str());
147 }
148
149 if (result.header.magic != GGUF_MAGIC) {
150 throw std::runtime_error("Not a valid GGUF file (magic number mismatch).");
151 }
152
153 Logger::info("Reading Metadata (" +
154 std::to_string(result.header.metadata_kv_count) + " pairs)...");
155 for (uint64_t i = 0; i < result.header.metadata_kv_count; ++i) {
156 std::string key;
157 GGUFValueType value_type_enum;
158 try {
159 key = read_gguf_string(metadata_file);
160 read_raw(metadata_file, value_type_enum);
161
162 switch (value_type_enum) {
164 uint8_t val;
165 read_raw(metadata_file, val);
166 result.metadata[key] = val;
167 break;
168 }
169 case GGUFValueType::INT8: {
170 int8_t val;
171 read_raw(metadata_file, val);
172 result.metadata[key] = val;
173 break;
174 }
176 uint16_t val;
177 read_raw(metadata_file, val);
178 result.metadata[key] = val;
179 break;
180 }
182 int16_t val;
183 read_raw(metadata_file, val);
184 result.metadata[key] = val;
185 break;
186 }
188 uint32_t val;
189 read_raw(metadata_file, val);
190 result.metadata[key] = val;
191 break;
192 }
194 int32_t val;
195 read_raw(metadata_file, val);
196 result.metadata[key] = val;
197 break;
198 }
200 float val;
201 read_raw(metadata_file, val);
202 result.metadata[key] = val;
203 break;
204 }
205 case GGUFValueType::BOOL: {
206 uint8_t byte;
207 read_raw(metadata_file, byte);
208 result.metadata[key] = (byte != 0);
209 break;
210 }
212 std::string val = read_gguf_string(metadata_file);
213 result.metadata[key] = val;
214 break;
215 }
217 uint64_t val;
218 read_raw(metadata_file, val);
219 result.metadata[key] = val;
220 break;
221 }
223 int64_t val;
224 read_raw(metadata_file, val);
225 result.metadata[key] = val;
226 break;
227 }
229 double val;
230 read_raw(metadata_file, val);
231 result.metadata[key] = val;
232 break;
233 }
235 GGUFValueType array_type_enum;
236 uint64_t count;
237 read_raw(metadata_file, array_type_enum);
238 read_raw(metadata_file, count);
239
240 GGUFArray array_obj;
241 array_obj.type = array_type_enum;
242 array_obj.len = count;
243 result.metadata[key] = array_obj;
244 bool skipped_data = false;
245 if (key == "tokenizer.ggml.tokens" &&
246 array_type_enum == GGUFValueType::STRING) {
247 Logger::info("Loading STRING array data ('" + key + "') with " +
248 std::to_string(count) + " elements...");
249 result.tokenizer_tokens.reserve(static_cast<size_t>(count));
250 for (uint64_t arr_i = 0; arr_i < count; ++arr_i) {
251 result.tokenizer_tokens.push_back(read_gguf_string(metadata_file));
252 }
253 Logger::info("Loaded tokenizer_tokens. Size: " +
254 std::to_string(result.tokenizer_tokens.size()));
255 } else if (key == "tokenizer.ggml.scores" &&
256 array_type_enum == GGUFValueType::FLOAT32) {
257 Logger::info("Loading FLOAT32 array data ('" + key + "') with " +
258 std::to_string(count) + " elements...");
259 result.tokenizer_scores.resize(static_cast<size_t>(count));
260 metadata_file.read(reinterpret_cast<char*>(result.tokenizer_scores.data()),
261 static_cast<std::streamsize>(count * sizeof(float)));
262 if (!metadata_file) {
263 throw std::runtime_error(
264 "GGUF Error: Failed to read scores array data.");
265 }
266 Logger::info("Loaded tokenizer_scores. Size: " +
267 std::to_string(result.tokenizer_scores.size()));
268 } else if (key == "tokenizer.ggml.token_type" &&
269 (array_type_enum == GGUFValueType::UINT32 || array_type_enum == GGUFValueType::INT32) ) {
270 Logger::info("Loading " + std::string(array_type_enum == GGUFValueType::UINT32 ? "UINT32" : "INT32") +
271 " array data ('" + key + "') with " +
272 std::to_string(count) + " elements...");
273 result.tokenizer_token_types.resize(static_cast<size_t>(count));
274 if (array_type_enum == GGUFValueType::UINT32) {
275 metadata_file.read(
276 reinterpret_cast<char*>(result.tokenizer_token_types.data()),
277 static_cast<std::streamsize>(count * sizeof(uint32_t)));
278 } else { // GGUFValueType::INT32
279 std::vector<int32_t> temp_s32_types(static_cast<size_t>(count));
280 metadata_file.read(
281 reinterpret_cast<char*>(temp_s32_types.data()),
282 static_cast<std::streamsize>(count * sizeof(int32_t)));
283 for(size_t k=0; k < count; ++k) {
284 result.tokenizer_token_types[k] = static_cast<uint32_t>(temp_s32_types[k]);
285 }
286 }
287 if (!metadata_file) {
288 throw std::runtime_error(
289 "GGUF Error: Failed to read token_type array data.");
290 }
291 Logger::info("Loaded tokenizer_token_types. Size: " +
292 std::to_string(result.tokenizer_token_types.size()));
293 } else if (key == "tokenizer.ggml.merges" &&
294 array_type_enum == GGUFValueType::STRING) {
295 Logger::info("Loading STRING array data ('" + key + "') with " +
296 std::to_string(count) + " elements...");
297 result.tokenizer_merges.reserve(static_cast<size_t>(count));
298 for (uint64_t arr_i = 0; arr_i < count; ++arr_i) {
299 result.tokenizer_merges.push_back(read_gguf_string(metadata_file));
300 }
301 Logger::info("Loaded tokenizer_merges. Size: " +
302 std::to_string(result.tokenizer_merges.size()));
303 } else {
304 skipped_data = true;
306 "Skipping unhandled/non-tokenizer ARRAY data for key '" + key +
307 "' (Type: " +
308 std::to_string(static_cast<uint32_t>(array_type_enum)) +
309 ", Count: " + std::to_string(count) + ")");
310
311 if (array_type_enum == GGUFValueType::STRING) {
312 for (uint64_t arr_i = 0; arr_i < count; ++arr_i) {
313 try {
314 std::string discarded_str = read_gguf_string(metadata_file);
315 } catch (const std::exception& e) {
316 Logger::error("Error skipping string element " +
317 std::to_string(arr_i) + " for key '" + key +
318 "': " + e.what());
319 throw;
320 }
321 }
322 } else {
323 size_t element_size = gguf_value_type_size(array_type_enum);
324 if (element_size == 0) {
325 throw std::runtime_error(
326 "Cannot skip array for key '" + key +
327 "' with unsupported or variable-sized element type: " +
328 std::to_string(static_cast<uint32_t>(array_type_enum)));
329 }
330
331 if (count > 0 &&
332 element_size > std::numeric_limits<uint64_t>::max() / count) {
333 throw std::overflow_error(
334 "Array size overflow calculating skip amount for key '" +
335 key + "'");
336 }
337 uint64_t total_size_to_skip = count * element_size;
338 if (total_size_to_skip > 0) {
339 metadata_file.seekg(static_cast<std::streamoff>(total_size_to_skip),
340 std::ios::cur);
341 if (!metadata_file) {
342 throw std::runtime_error(
343 "GGUF Error: Failed to seek past array data for key '" +
344 key + "'");
345 }
346 }
347 }
348 }
349 break;
350 }
351 default: {
352 throw std::runtime_error(
353 "Unknown metadata type encountered: " +
354 std::to_string(static_cast<uint32_t>(value_type_enum)) +
355 " for key: " + key);
356 }
357 }
358 } catch (const std::exception& e) {
359 std::string error_key =
360 key.empty() ? "(unknown key, error during key read)" : key;
362 "Error reading metadata for key: '" + error_key +
363 "' (type: " + std::to_string(static_cast<uint32_t>(value_type_enum)) +
364 ") - " + e.what());
365 throw;
366 }
367 }
368 Logger::info("Finished reading metadata.");
369
370 result.tensor_infos.reserve(static_cast<size_t>(result.header.tensor_count));
371 Logger::info("Reading Tensor Info (" +
372 std::to_string(result.header.tensor_count) + " tensors)...");
373 uint64_t accumulated_offset_debug = 0;
374 for (uint64_t i = 0; i < result.header.tensor_count; ++i) {
375 GGUFTensorInfo info;
376 try {
377 info.name = read_gguf_string(metadata_file);
378
379 uint32_t n_dims;
380 read_raw(metadata_file, n_dims);
381 if (n_dims > GGUF_MAX_TENSOR_DIMS) {
382 throw std::runtime_error("Tensor '" + info.name +
383 "' has unsupported number of dimensions: " +
384 std::to_string(n_dims));
385 }
386 info.shape.resize(n_dims);
387 for (uint32_t d = 0; d < n_dims; ++d) {
388 read_raw(metadata_file, info.shape[d]);
389 }
390
391 uint32_t ggml_type_u32;
392 read_raw(metadata_file, ggml_type_u32);
393 info.type = static_cast<GGMLType>(ggml_type_u32);
394
395 uint64_t pos_before_offset_read = metadata_file.tellg();
396
397 read_raw(metadata_file, info.offset);
398
399 std::stringstream ss_offset_log;
400 ss_offset_log
401 << "[GGUF_TENSOR_INFO] Tensor " << i << " ('" << info.name
402 << "'):" << "\n Raw offset from file: " << info.offset
403 << "\n File pos before offset read: " << pos_before_offset_read
404 << "\n Calculated accumulated_offset_debug (before this tensor): "
405 << accumulated_offset_debug;
406
407 info.num_elements = 1;
408 for (uint64_t dim : info.shape) {
409 if (dim > 0 &&
410 info.num_elements > std::numeric_limits<uint64_t>::max() / dim) {
411 throw std::overflow_error(
412 "Tensor dimension overflow calculating num_elements for tensor "
413 "'" +
414 info.name + "'");
415 }
416 info.num_elements *= dim;
417 }
418
419 size_t type_size = ggml_type_size(info.type);
420 size_t block_size = ggml_type_block_size(info.type);
421
422 if (block_size == 0 && info.num_elements > 0) {
423 throw std::runtime_error(
424 "Tensor '" + info.name +
425 "' has unknown or unsupported type: " + std::to_string(info.type));
426 }
427
428 if (block_size > 1) {
429 if (info.num_elements % block_size != 0) {
430 throw std::runtime_error("Tensor '" + info.name + "' num_elements (" +
431 std::to_string(info.num_elements) +
432 ") not divisible by block_size (" +
433 std::to_string(block_size) + ") for type " +
434 ggml_type_name(info.type));
435 }
436 uint64_t num_blocks = info.num_elements / block_size;
437 if (type_size > 0 &&
438 num_blocks > std::numeric_limits<uint64_t>::max() / type_size) {
439 throw std::overflow_error(
440 "Tensor size overflow calculating size_in_bytes for tensor '" +
441 info.name + "'");
442 }
443 info.size_in_bytes = static_cast<size_t>(num_blocks * type_size);
444 } else {
445 if (type_size > 0 &&
446 info.num_elements >
447 std::numeric_limits<uint64_t>::max() / type_size) {
448 throw std::overflow_error(
449 "Tensor size overflow calculating size_in_bytes for tensor '" +
450 info.name + "'");
451 }
452 info.size_in_bytes = static_cast<size_t>(info.num_elements * type_size);
453 }
454
455 ss_offset_log << "\n Calculated size_in_bytes for this tensor: "
456 << info.size_in_bytes;
457 Logger::info(ss_offset_log.str());
458 accumulated_offset_debug += info.size_in_bytes;
459
460 result.tensor_infos.push_back(info);
461 {
462 std::stringstream ss_tensor;
463 ss_tensor << "Tensor " << i << ": Name='" << info.name
464 << "', Type=" << ggml_type_name(info.type) << ", Shape=[ ";
465 for (size_t d = 0; d < info.shape.size(); ++d)
466 ss_tensor << info.shape[d]
467 << (d == info.shape.size() - 1 ? "" : ", ");
468 ss_tensor << " ], Offset=" << info.offset
469 << ", Size=" << info.size_in_bytes << " bytes";
470 Logger::info(ss_tensor.str());
471 }
472 } catch (const std::exception& e) {
473 std::string tensor_name =
474 info.name.empty() ? ("(unknown, index " + std::to_string(i) + ")")
475 : info.name;
476 Logger::error("Error reading tensor info for tensor " + tensor_name +
477 ": " + e.what());
478 throw;
479 }
480 }
481 Logger::info("Finished reading tensor info.");
482
483 Logger::info("Populating tensor_infos_map...");
484 for (const auto& tinfo : result.tensor_infos) {
485 if (result.tensor_infos_map.count(tinfo.name)) {
486 Logger::warning("Duplicate tensor name found in GGUF: '" + tinfo.name +
487 "'. Overwriting entry in map.");
488 }
489 result.tensor_infos_map[tinfo.name] = tinfo;
490 }
491 Logger::info("Finished populating tensor_infos_map. Map size: " +
492 std::to_string(result.tensor_infos_map.size()));
493
494 uint64_t alignment = GGUF_DEFAULT_ALIGNMENT;
495 try {
496 if (result.metadata.count("general.alignment")) {
497 uint32_t align_val =
498 std::get<uint32_t>(result.metadata["general.alignment"]);
499 if (align_val > 0) {
500 alignment = align_val;
501 }
502 Logger::info("Using alignment value from metadata: " +
503 std::to_string(alignment));
504 } else {
506 "Metadata key 'general.alignment' not found. Using default "
507 "alignment: " +
508 std::to_string(alignment));
509 }
510 } catch (const std::bad_variant_access& e) {
512 "Could not read 'general.alignment' metadata as uint32. Using default "
513 "alignment: " +
514 std::to_string(alignment));
515 } catch (const std::exception& e) {
516 Logger::warning("Error accessing 'general.alignment' metadata: " +
517 std::string(e.what()) +
518 ". Using default alignment: " + std::to_string(alignment));
519 }
520 result.data_alignment = alignment; // Store the determined alignment
521
522 uint64_t current_pos_metadata_stream = metadata_file.tellg();
523 Logger::info("[GGUF_LOAD] Current file position (metadata stream) before padding seek: " +
524 std::to_string(current_pos_metadata_stream));
525 uint64_t padding = (alignment - (current_pos_metadata_stream % alignment)) % alignment;
526 Logger::info("[GGUF_LOAD] Calculated padding: " + std::to_string(padding));
527
528 uint64_t actual_data_start_offset_in_file = current_pos_metadata_stream + padding;
530 "[GGUF_LOAD] Calculated actual_data_start_offset_in_file (for mmap): " +
531 std::to_string(actual_data_start_offset_in_file));
532
533 metadata_file.close();
534 Logger::info("[GGUF_LOAD] Metadata ifstream closed.");
535
536 if (!use_mmap) {
537 Logger::info("[GGUF_LOAD] mmap is disabled by configuration. Loading tensor data into memory using OPTIMIZED bulk I/O.");
538
539 uint64_t total_tensor_data_size = 0;
540 for (const auto& tensor_info : result.tensor_infos) {
541 total_tensor_data_size = std::max(total_tensor_data_size, tensor_info.offset + tensor_info.size_in_bytes);
542 }
543
544 if (total_tensor_data_size > 0) {
545 result.tensor_data.resize(total_tensor_data_size);
546
547 std::ifstream tensor_file(filename, std::ios::binary);
548 if (!tensor_file.is_open()) {
549 throw std::runtime_error("Failed to open file for tensor data reading: " + filename);
550 }
551
552 tensor_file.seekg(actual_data_start_offset_in_file);
553 if (!tensor_file) {
554 throw std::runtime_error("Failed to seek to tensor data start in file: " + filename);
555 }
556
557 // OPTIMIZATION: Use larger buffer for bulk reading to reduce I/O overhead
558 constexpr size_t BULK_READ_BUFFER_SIZE = 64 * 1024 * 1024; // 64MB chunks
559 size_t bytes_remaining = total_tensor_data_size;
560 size_t bytes_read_total = 0;
561
562 Logger::info("[GGUF_LOAD] Reading " + std::to_string(total_tensor_data_size) + " bytes in optimized " + std::to_string(BULK_READ_BUFFER_SIZE / (1024*1024)) + "MB chunks...");
563
564 while (bytes_remaining > 0) {
565 size_t chunk_size = std::min(bytes_remaining, BULK_READ_BUFFER_SIZE);
566
567 tensor_file.read(reinterpret_cast<char*>(result.tensor_data.data() + bytes_read_total), chunk_size);
568 if (!tensor_file) {
569 throw std::runtime_error("Failed to read tensor data chunk at offset " + std::to_string(bytes_read_total) + " from file: " + filename);
570 }
571
572 bytes_read_total += chunk_size;
573 bytes_remaining -= chunk_size;
574
575 if (bytes_read_total % (256 * 1024 * 1024) == 0) { // Log every 256MB
576 Logger::info("[GGUF_LOAD] Progress: " + std::to_string(bytes_read_total / (1024*1024)) + "MB / " + std::to_string(total_tensor_data_size / (1024*1024)) + "MB loaded");
577 }
578 }
579
580 tensor_file.close();
581 Logger::info("[GGUF_LOAD] Successfully loaded " + std::to_string(total_tensor_data_size) + " bytes of tensor data using optimized bulk I/O.");
582
583 if (total_tensor_data_size >= 16) {
584 std::stringstream ss_bytes;
585 ss_bytes << "[GGUF_LOAD] First 16 bytes of tensor data: ";
586 for (int i = 0; i < 16; ++i) {
587 ss_bytes << "0x" << std::hex << static_cast<int>(result.tensor_data[i]) << " ";
588 }
589 Logger::info(ss_bytes.str());
590 }
591 } else {
592 Logger::info("[GGUF_LOAD] No tensor data to load (total size is 0).");
593 }
594
595 return result;
596 }
597
598#ifndef _WIN32
599 result.file_descriptor = open(filename.c_str(), O_RDONLY);
600 if (result.file_descriptor == -1) {
601 throw std::runtime_error("GGUF Error: Failed to open file for mmap: " + filename + " - " + strerror(errno));
602 }
603 Logger::info("[GGUF_LOAD] File opened for mmap with fd: " + std::to_string(result.file_descriptor));
604
605 struct stat file_stat;
606 if (fstat(result.file_descriptor, &file_stat) == -1) {
607 close(result.file_descriptor);
608 result.file_descriptor = -1;
609 throw std::runtime_error("GGUF Error: Failed to fstat file for mmap: " + filename + " - " + strerror(errno));
610 }
611 uint64_t file_total_size = static_cast<uint64_t>(file_stat.st_size);
612#else // _WIN32
613 result.h_file = CreateFileA(
614 filename.c_str(),
615 GENERIC_READ,
616 FILE_SHARE_READ,
617 NULL,
618 OPEN_EXISTING,
619 FILE_ATTRIBUTE_NORMAL | FILE_FLAG_RANDOM_ACCESS, // Hint for mmap-like access
620 NULL
621 );
622 if (result.h_file == INVALID_HANDLE_VALUE) {
623 throw std::runtime_error("GGUF Error: Failed to open file for mmap (CreateFileA): " + filename + " - " + GetWindowsErrorString(GetLastError()));
624 }
625 Logger::info("[GGUF_LOAD] File opened for mmap with h_file: " + std::to_string(reinterpret_cast<uintptr_t>(result.h_file)));
626
627 LARGE_INTEGER fileSizeWindows;
628 if (!GetFileSizeEx(result.h_file, &fileSizeWindows)) {
629 DWORD error_code = GetLastError();
630 CloseHandle(result.h_file);
631 result.h_file = INVALID_HANDLE_VALUE;
632 throw std::runtime_error("GGUF Error: Failed to GetFileSizeEx for mmap: " + filename + " - " + GetWindowsErrorString(error_code));
633 }
634 uint64_t file_total_size = static_cast<uint64_t>(fileSizeWindows.QuadPart);
635#endif
636
637 if (file_total_size < actual_data_start_offset_in_file) {
638#ifndef _WIN32
639 close(result.file_descriptor);
640 result.file_descriptor = -1;
641#else
642 CloseHandle(result.h_file);
643 result.h_file = INVALID_HANDLE_VALUE;
644#endif
645 throw std::runtime_error(
646 "GGUF Error: File total size (" + std::to_string(file_total_size) +
647 ") is less than calculated actual_data_start_offset_in_file (" + std::to_string(actual_data_start_offset_in_file) + ").");
648 }
649
650 uint64_t tensor_data_block_size_on_disk = file_total_size - actual_data_start_offset_in_file;
651 Logger::info("[GGUF_LOAD] Calculated tensor_data_block_size_on_disk (for mmap length calculation): " +
652 std::to_string(tensor_data_block_size_on_disk) + " bytes.");
653
654 long page_size;
655#ifndef _WIN32
656 page_size = sysconf(_SC_PAGE_SIZE);
657 if (page_size == -1) {
658 close(result.file_descriptor);
659 result.file_descriptor = -1;
660 throw std::runtime_error(std::string("GGUF Error: Failed to get page size using sysconf - ") + strerror(errno));
661 }
662#else // _WIN32
663 SYSTEM_INFO sysInfo;
664 GetSystemInfo(&sysInfo);
665 // For MapViewOfFile, offsets must be aligned to dwAllocationGranularity.
666 // Page size (dwPageSize) might be smaller, but dwAllocationGranularity is the key for mmap view offsets.
667 page_size = static_cast<long>(sysInfo.dwAllocationGranularity);
668 if (page_size <= 0) { // Sanity check
669 CloseHandle(result.h_file);
670 result.h_file = INVALID_HANDLE_VALUE;
671 throw std::runtime_error("GGUF Error: Failed to get valid system allocation granularity (page_size equivalent for mmap offset).");
672 }
673#endif
674 Logger::info("[GGUF_LOAD] System page/allocation granularity for mmap offset: " + std::to_string(page_size));
675
676 uint64_t mmap_offset = (actual_data_start_offset_in_file / page_size) * page_size; // Align offset down to page boundary
677 result.offset_diff_for_mmap = static_cast<size_t>(actual_data_start_offset_in_file - mmap_offset);
678 size_t mmap_length = static_cast<size_t>(tensor_data_block_size_on_disk + result.offset_diff_for_mmap);
679
680 Logger::info("[GGUF_LOAD] Aligning mmap: actual_data_start_offset_in_file=" + std::to_string(actual_data_start_offset_in_file) +
681 ", mmap_offset=" + std::to_string(mmap_offset) + // This is the offset from file start for mmap view
682 ", offset_diff_for_mmap=" + std::to_string(result.offset_diff_for_mmap) + // Bytes from mmap view start to actual tensor data start
683 ", mmap_length=" + std::to_string(mmap_length)); // Total length of the mmap view
684
685 if (mmap_length > 0) {
686 result.mapped_tensor_data_size = mmap_length;
687#ifndef _WIN32
688 result.mapped_tensor_data = mmap(nullptr, result.mapped_tensor_data_size,
689 PROT_READ, MAP_SHARED,
690 result.file_descriptor, static_cast<off_t>(mmap_offset));
691#else // _WIN32
692 result.h_map_file = CreateFileMapping(
693 result.h_file,
694 NULL,
695 PAGE_READONLY,
696 0,
697 0,
698 NULL
699 );
700 if (result.h_map_file == NULL) {
701 DWORD error_code = GetLastError();
702 CloseHandle(result.h_file);
703 result.h_file = INVALID_HANDLE_VALUE;
704 throw std::runtime_error("GGUF Error: CreateFileMapping failed - " + GetWindowsErrorString(error_code));
705 }
706
707 // MapViewOfFile's dwFileOffsetHigh/Low parameters form the 64-bit offset.
708 // This offset (mmap_offset) MUST be a multiple of dwAllocationGranularity (our page_size for Windows).
709 DWORD mmap_offset_low = static_cast<DWORD>(mmap_offset & 0xFFFFFFFF);
710 DWORD mmap_offset_high = static_cast<DWORD>((mmap_offset >> 32) & 0xFFFFFFFF);
711
712 result.mapped_tensor_data = MapViewOfFile(
713 result.h_map_file,
714 FILE_MAP_READ,
715 mmap_offset_high,
716 mmap_offset_low,
717 result.mapped_tensor_data_size // This is dwNumberOfBytesToMap
718 );
719#endif
720
721 if (result.mapped_tensor_data == GGUFData::MMapFailure) { // Use platform-agnostic failure check
722 int last_error = 0;
723#ifndef _WIN32
724 last_error = errno;
725 // file_descriptor is closed by GGUFData destructor if it's still valid
726#else
727 last_error = GetLastError();
728 // h_map_file and h_file are closed by GGUFData destructor if they are still valid
729#endif
730 result.mapped_tensor_data = nullptr;
731 result.mapped_tensor_data_size = 0;
732 result.offset_diff_for_mmap = 0;
733 throw std::runtime_error("GGUF Error: mmap/MapViewOfFile failed. Aligned Offset: " + std::to_string(mmap_offset) +
734 ", Mmap Length: " + std::to_string(mmap_length) +
735#ifndef _WIN32
736 " - POSIX Error: " + strerror(last_error));
737#else
738 " - Windows Error: " + GetWindowsErrorString(last_error));
739#endif
740 }
741 Logger::info("[GGUF_LOAD] Successfully mmapped tensor data block. Mapped Address: " +
742 std::to_string(reinterpret_cast<uintptr_t>(result.mapped_tensor_data)) +
743 ", Mapped Size: " + std::to_string(result.mapped_tensor_data_size) +
744 " bytes from file offset " + std::to_string(mmap_offset));
745
746 if (result.mapped_tensor_data_size >= (result.offset_diff_for_mmap + 16)) {
747 std::stringstream ss_bytes;
748 ss_bytes << "[GGUF_LOAD] First 16 bytes of *actual* tensor data (after offset_diff) in mmap: ";
749 const uint8_t* actual_data_ptr_debug = static_cast<const uint8_t*>(result.mapped_tensor_data) + result.offset_diff_for_mmap;
750 for (int i = 0; i < 16; ++i)
751 ss_bytes << "0x" << std::hex << static_cast<int>(actual_data_ptr_debug[i]) << " ";
752 Logger::info(ss_bytes.str());
753 }
754
755#ifndef _WIN32
756
757 Logger::info("[GGUF_LOAD] Attempting to prefetch mmapped tensor data using posix_madvise(MADV_WILLNEED)...");
758 uint8_t* actual_tensor_data_block_start_in_mmap = static_cast<uint8_t*>(result.mapped_tensor_data) + result.offset_diff_for_mmap;
759
760 if (page_size <= 0) {
761 Logger::error("[GGUF_LOAD] Invalid page_size for madvise alignment: " + std::to_string(page_size) + ". Skipping prefetch.");
762 } else {
763 for (const auto& tensor_info : result.tensor_infos) {
764 if (tensor_info.size_in_bytes > 0) {
765 uintptr_t exact_tensor_start_addr_val = reinterpret_cast<uintptr_t>(actual_tensor_data_block_start_in_mmap + tensor_info.offset);
766 void* page_aligned_madvise_addr = reinterpret_cast<void*>(exact_tensor_start_addr_val - (exact_tensor_start_addr_val % static_cast<uintptr_t>(page_size)));
767 size_t madvise_length = (exact_tensor_start_addr_val + tensor_info.size_in_bytes) - reinterpret_cast<uintptr_t>(page_aligned_madvise_addr);
768
769 uintptr_t advised_region_start_val = reinterpret_cast<uintptr_t>(page_aligned_madvise_addr);
770 uintptr_t advised_region_end_val = advised_region_start_val + madvise_length;
771 uintptr_t overall_mmap_start_val = reinterpret_cast<uintptr_t>(result.mapped_tensor_data);
772 uintptr_t overall_mmap_end_val = overall_mmap_start_val + result.mapped_tensor_data_size;
773
774 if (advised_region_start_val >= overall_mmap_start_val && advised_region_end_val <= overall_mmap_end_val && advised_region_start_val < advised_region_end_val) { // Added check start < end
775 int ret = posix_madvise(page_aligned_madvise_addr, madvise_length, POSIX_MADV_WILLNEED);
776 if (ret != 0) {
777 Logger::warning("[GGUF_LOAD] posix_madvise failed for tensor '" + tensor_info.name +
778 "' (addr: " + std::to_string(reinterpret_cast<uintptr_t>(page_aligned_madvise_addr)) +
779 ", len: " + std::to_string(madvise_length) +
780 ") with error code " + std::to_string(errno) +
781 " (" + strerror(errno) + "). Skipping prefetch for this tensor.");
782 }
783 } else {
784 Logger::warning("[GGUF_LOAD] Tensor '" + tensor_info.name +
785 "' calculated region for madvise is invalid or out of overall mmap bounds. Skipping prefetch. "
786 /* ... detailed log as before ... */ );
787 }
788 }
789 }
790 }
791 Logger::info("[GGUF_LOAD] Finished POSIX prefetching attempt with posix_madvise.");
792
793#else // _WIN32
794 Logger::info("[GGUF_LOAD] Tensor prefetching (posix_madvise) is currently implemented for POSIX systems. Skipping for Windows for now.");
795#endif
796
797 } else {
798 Logger::info("[GGUF_LOAD] Tensor data block size (or mmap_length) is 0. Nothing to mmap.");
799 result.mapped_tensor_data = nullptr; // Ensure it's null if not mapped
800 result.mapped_tensor_data_size = 0;
801 result.offset_diff_for_mmap = 0;
802 }
803
804 Logger::info("GGUF metadata loaded and tensor data (if any) mmapped successfully.");
805 return result;
806}
static void warning(const std::string &message)
Definition logger.cpp:139
static void info(const std::string &message)
Definition logger.cpp:135
static void error(const std::string &message)
Definition logger.cpp:143
GGMLType
Enumeration of GGML tensor data types.
Definition ggml_types.h:21
GGUFValueType
Enumeration of value types used in GGUF metadata.
Definition ggml_types.h:51
size_t gguf_value_type_size(GGUFValueType type)
void read_raw(std::ifstream &file, T &dest)
Reads raw binary data from a file stream.
std::string read_gguf_string(std::ifstream &file)
Reads a string from a GGUF format file.
constexpr uint32_t GGUF_MAGIC
GGUF magic number that identifies the file format Spells "GGUF" in ASCII (0x47475546)
Definition gguf_parser.h:24
constexpr uint32_t GGUF_MAX_TENSOR_DIMS
Definition gguf_parser.h:30
constexpr uint64_t GGUF_DEFAULT_ALIGNMENT
Constants for GGUF file parsing and validation.
Definition gguf_parser.h:29
size_t ggml_type_block_size(GGMLType type)
Gets the block size for a GGML type.
size_t ggml_type_size(GGMLType type)
Gets the size in bytes of a GGML type.
const char * ggml_type_name(GGMLType type)
Gets the string name of a GGML type.
Represents an array in GGUF metadata.
GGUFValueType type
uint64_t len
Complete representation of a GGUF file's contents.
static const void * MMapFailure
uint64_t data_alignment
std::vector< GGUFTensorInfo > tensor_infos
std::vector< std::string > tokenizer_tokens
std::vector< float > tokenizer_scores
size_t offset_diff_for_mmap
std::vector< uint8_t > tensor_data
std::vector< std::string > tokenizer_merges
size_t mapped_tensor_data_size
std::map< std::string, GGUFMetadataValue > metadata
int file_descriptor
void * mapped_tensor_data
std::map< std::string, GGUFTensorInfo > tensor_infos_map
GGUFHeader header
std::vector< uint32_t > tokenizer_token_types
uint64_t tensor_count
uint64_t metadata_kv_count
uint32_t version
uint32_t magic
Information about a tensor stored in a GGUF file.
size_t size_in_bytes
size_t num_elements
uint64_t offset
std::vector< uint64_t > shape
std::string name

References ARRAY, BOOL, GGUFData::data_alignment, Logger::error(), GGUFData::file_descriptor, FLOAT32, FLOAT64, ggml_type_block_size(), ggml_type_name(), ggml_type_size(), GGUF_DEFAULT_ALIGNMENT, GGUF_MAGIC, GGUF_MAX_TENSOR_DIMS, gguf_value_type_size(), GGUFData::header, Logger::info(), INT16, INT32, INT64, INT8, GGUFArray::len, GGUFHeader::magic, GGUFData::mapped_tensor_data, GGUFData::mapped_tensor_data_size, GGUFData::metadata, GGUFHeader::metadata_kv_count, GGUFData::MMapFailure, GGUFTensorInfo::name, GGUFTensorInfo::num_elements, GGUFTensorInfo::offset, GGUFData::offset_diff_for_mmap, read_gguf_string(), read_raw(), GGUFTensorInfo::shape, GGUFTensorInfo::size_in_bytes, STRING, GGUFHeader::tensor_count, GGUFData::tensor_data, GGUFData::tensor_infos, GGUFData::tensor_infos_map, GGUFData::tokenizer_merges, GGUFData::tokenizer_scores, GGUFData::tokenizer_token_types, GGUFData::tokenizer_tokens, GGUFArray::type, GGUFTensorInfo::type, UINT16, UINT32, UINT64, UINT8, GGUFHeader::version, and Logger::warning().

Referenced by TinyLlamaModel::TinyLlamaModel().

◆ read_gguf_string()

std::string read_gguf_string ( std::ifstream &  file)

Reads a string from a GGUF format file.

Parameters
fileInput file stream positioned at the start of a string
Returns
The string read from the file
Exceptions
std::runtime_errorif string length exceeds GGUF_STRING_MAX_LENGTH

Definition at line 82 of file gguf_parser.cpp.

82 {
83 uint64_t len;
84 read_raw(file, len);
85 if (len > 0) {
86 if (len > GGUF_STRING_MAX_LENGTH) {
87 throw std::runtime_error(
88 "GGUF Error: String length exceeds sanity limit: " +
89 std::to_string(len));
90 }
91 std::vector<char> buf(static_cast<size_t>(len));
92 file.read(buf.data(), static_cast<std::streamsize>(len));
93 if (!file) {
94 throw std::runtime_error("GGUF Error: Failed to read string data.");
95 }
96 return std::string(buf.data(), static_cast<size_t>(len));
97 } else {
98 return "";
99 }
100}
constexpr uint64_t GGUF_STRING_MAX_LENGTH
Definition gguf_parser.h:31

References GGUF_STRING_MAX_LENGTH, and read_raw().

Referenced by load_gguf_meta().

◆ read_raw()

template<typename T >
void read_raw ( std::ifstream &  file,
T &  dest 
)

Reads raw binary data from a file stream.

Template Parameters
TThe type of data to read
Parameters
fileInput file stream
destDestination variable to store the read data

Definition at line 62 of file gguf_parser.cpp.

62 {
63 file.read(reinterpret_cast<char*>(&dest), sizeof(T));
64 if (!file) {
65 throw std::runtime_error(
66 "GGUF Error: Failed to read data from file stream.");
67 }
68}

Referenced by load_gguf_meta(), and read_gguf_string().

◆ read_raw< double >()

template void read_raw< double > ( std::ifstream &  ,
double &   
)

◆ read_raw< float >()

template void read_raw< float > ( std::ifstream &  ,
float &   
)

◆ read_raw< GGUFValueType >()

template void read_raw< GGUFValueType > ( std::ifstream &  ,
GGUFValueType  
)

◆ read_raw< int16_t >()

template void read_raw< int16_t > ( std::ifstream &  ,
int16_t &   
)

◆ read_raw< int32_t >()

template void read_raw< int32_t > ( std::ifstream &  ,
int32_t &   
)

◆ read_raw< int64_t >()

template void read_raw< int64_t > ( std::ifstream &  ,
int64_t &   
)

◆ read_raw< int8_t >()

template void read_raw< int8_t > ( std::ifstream &  ,
int8_t &   
)

◆ read_raw< uint16_t >()

template void read_raw< uint16_t > ( std::ifstream &  ,
uint16_t &   
)

◆ read_raw< uint32_t >()

template void read_raw< uint32_t > ( std::ifstream &  ,
uint32_t &   
)

◆ read_raw< uint64_t >()

template void read_raw< uint64_t > ( std::ifstream &  ,
uint64_t &   
)

◆ read_raw< uint8_t >()

template void read_raw< uint8_t > ( std::ifstream &  ,
uint8_t &   
)