Loads GGUF metadata and optionally memory-maps tensor data.
Parses the header, metadata, and tensor information from a GGUF file. If mmap is enabled, it will also memory-map the tensor data region. If mmap is disabled, tensor data pointers will be null and fd will be -1.
123 {
124 Logger::info(
"Attempting to load GGUF file: " + filename + (use_mmap ?
" with mmap" :
" without mmap"));
125 std::ifstream metadata_file(filename, std::ios::binary);
126 if (!metadata_file.is_open()) {
127 throw std::runtime_error("Failed to open file for metadata: " + filename);
128 }
129
131
132
133
138
139 {
140 std::stringstream ss;
141 ss << "Read Header:\n"
142 <<
" Magic: 0x" << std::hex << result.
header.
magic << std::dec <<
"\n"
147 }
148
150 throw std::runtime_error("Not a valid GGUF file (magic number mismatch).");
151 }
152
156 std::string key;
158 try {
160 read_raw(metadata_file, value_type_enum);
161
162 switch (value_type_enum) {
164 uint8_t val;
167 break;
168 }
170 int8_t val;
173 break;
174 }
176 uint16_t val;
179 break;
180 }
182 int16_t val;
185 break;
186 }
188 uint32_t val;
191 break;
192 }
194 int32_t val;
197 break;
198 }
200 float val;
203 break;
204 }
206 uint8_t byte;
209 break;
210 }
214 break;
215 }
217 uint64_t val;
220 break;
221 }
223 int64_t val;
226 break;
227 }
229 double val;
232 break;
233 }
236 uint64_t count;
237 read_raw(metadata_file, array_type_enum);
239
241 array_obj.
type = array_type_enum;
242 array_obj.
len = count;
244 bool skipped_data = false;
245 if (key == "tokenizer.ggml.tokens" &&
247 Logger::info(
"Loading STRING array data ('" + key +
"') with " +
248 std::to_string(count) + " elements...");
250 for (uint64_t arr_i = 0; arr_i < count; ++arr_i) {
252 }
255 } else if (key == "tokenizer.ggml.scores" &&
257 Logger::info(
"Loading FLOAT32 array data ('" + key +
"') with " +
258 std::to_string(count) + " elements...");
261 static_cast<std::streamsize>(count * sizeof(float)));
262 if (!metadata_file) {
263 throw std::runtime_error(
264 "GGUF Error: Failed to read scores array data.");
265 }
268 } else if (key == "tokenizer.ggml.token_type" &&
271 " array data ('" + key + "') with " +
272 std::to_string(count) + " elements...");
275 metadata_file.read(
277 static_cast<std::streamsize>(count * sizeof(uint32_t)));
278 } else {
279 std::vector<int32_t> temp_s32_types(static_cast<size_t>(count));
280 metadata_file.read(
281 reinterpret_cast<char*>(temp_s32_types.data()),
282 static_cast<std::streamsize>(count * sizeof(int32_t)));
283 for(size_t k=0; k < count; ++k) {
285 }
286 }
287 if (!metadata_file) {
288 throw std::runtime_error(
289 "GGUF Error: Failed to read token_type array data.");
290 }
293 } else if (key == "tokenizer.ggml.merges" &&
295 Logger::info(
"Loading STRING array data ('" + key +
"') with " +
296 std::to_string(count) + " elements...");
298 for (uint64_t arr_i = 0; arr_i < count; ++arr_i) {
300 }
303 } else {
304 skipped_data = true;
306 "Skipping unhandled/non-tokenizer ARRAY data for key '" + key +
307 "' (Type: " +
308 std::to_string(static_cast<uint32_t>(array_type_enum)) +
309 ", Count: " + std::to_string(count) + ")");
310
312 for (uint64_t arr_i = 0; arr_i < count; ++arr_i) {
313 try {
315 } catch (const std::exception& e) {
317 std::to_string(arr_i) + " for key '" + key +
318 "': " + e.what());
319 throw;
320 }
321 }
322 } else {
324 if (element_size == 0) {
325 throw std::runtime_error(
326 "Cannot skip array for key '" + key +
327 "' with unsupported or variable-sized element type: " +
328 std::to_string(static_cast<uint32_t>(array_type_enum)));
329 }
330
331 if (count > 0 &&
332 element_size > std::numeric_limits<uint64_t>::max() / count) {
333 throw std::overflow_error(
334 "Array size overflow calculating skip amount for key '" +
335 key + "'");
336 }
337 uint64_t total_size_to_skip = count * element_size;
338 if (total_size_to_skip > 0) {
339 metadata_file.seekg(static_cast<std::streamoff>(total_size_to_skip),
340 std::ios::cur);
341 if (!metadata_file) {
342 throw std::runtime_error(
343 "GGUF Error: Failed to seek past array data for key '" +
344 key + "'");
345 }
346 }
347 }
348 }
349 break;
350 }
351 default: {
352 throw std::runtime_error(
353 "Unknown metadata type encountered: " +
354 std::to_string(static_cast<uint32_t>(value_type_enum)) +
355 " for key: " + key);
356 }
357 }
358 } catch (const std::exception& e) {
359 std::string error_key =
360 key.empty() ? "(unknown key, error during key read)" : key;
362 "Error reading metadata for key: '" + error_key +
363 "' (type: " + std::to_string(static_cast<uint32_t>(value_type_enum)) +
364 ") - " + e.what());
365 throw;
366 }
367 }
369
373 uint64_t accumulated_offset_debug = 0;
376 try {
378
379 uint32_t n_dims;
382 throw std::runtime_error(
"Tensor '" + info.
name +
383 "' has unsupported number of dimensions: " +
384 std::to_string(n_dims));
385 }
386 info.
shape.resize(n_dims);
387 for (uint32_t d = 0; d < n_dims; ++d) {
389 }
390
391 uint32_t ggml_type_u32;
392 read_raw(metadata_file, ggml_type_u32);
394
395 uint64_t pos_before_offset_read = metadata_file.tellg();
396
398
399 std::stringstream ss_offset_log;
400 ss_offset_log
401 <<
"[GGUF_TENSOR_INFO] Tensor " << i <<
" ('" << info.
name
402 <<
"'):" <<
"\n Raw offset from file: " << info.
offset
403 << "\n File pos before offset read: " << pos_before_offset_read
404 << "\n Calculated accumulated_offset_debug (before this tensor): "
405 << accumulated_offset_debug;
406
408 for (uint64_t dim : info.shape) {
409 if (dim > 0 &&
410 info.
num_elements > std::numeric_limits<uint64_t>::max() / dim) {
411 throw std::overflow_error(
412 "Tensor dimension overflow calculating num_elements for tensor "
413 "'" +
415 }
417 }
418
421
423 throw std::runtime_error(
424 "Tensor '" + info.
name +
425 "' has unknown or unsupported type: " + std::to_string(info.
type));
426 }
427
428 if (block_size > 1) {
430 throw std::runtime_error(
"Tensor '" + info.
name +
"' num_elements (" +
432 ") not divisible by block_size (" +
433 std::to_string(block_size) + ") for type " +
435 }
437 if (type_size > 0 &&
438 num_blocks > std::numeric_limits<uint64_t>::max() / type_size) {
439 throw std::overflow_error(
440 "Tensor size overflow calculating size_in_bytes for tensor '" +
442 }
443 info.
size_in_bytes =
static_cast<size_t>(num_blocks * type_size);
444 } else {
445 if (type_size > 0 &&
447 std::numeric_limits<uint64_t>::max() / type_size) {
448 throw std::overflow_error(
449 "Tensor size overflow calculating size_in_bytes for tensor '" +
451 }
453 }
454
455 ss_offset_log << "\n Calculated size_in_bytes for this tensor: "
459
461 {
462 std::stringstream ss_tensor;
463 ss_tensor <<
"Tensor " << i <<
": Name='" << info.
name
465 for (
size_t d = 0; d < info.
shape.size(); ++d)
466 ss_tensor << info.
shape[d]
467 << (d == info.
shape.size() - 1 ?
"" :
", ");
468 ss_tensor <<
" ], Offset=" << info.
offset
471 }
472 } catch (const std::exception& e) {
473 std::string tensor_name =
474 info.
name.empty() ? (
"(unknown, index " + std::to_string(i) +
")")
475 : info.name;
476 Logger::error(
"Error reading tensor info for tensor " + tensor_name +
477 ": " + e.what());
478 throw;
479 }
480 }
482
484 for (const auto& tinfo : result.tensor_infos) {
486 Logger::warning(
"Duplicate tensor name found in GGUF: '" + tinfo.name +
487 "'. Overwriting entry in map.");
488 }
490 }
491 Logger::info(
"Finished populating tensor_infos_map. Map size: " +
493
495 try {
496 if (result.
metadata.count(
"general.alignment")) {
497 uint32_t align_val =
498 std::get<uint32_t>(result.
metadata[
"general.alignment"]);
499 if (align_val > 0) {
500 alignment = align_val;
501 }
503 std::to_string(alignment));
504 } else {
506 "Metadata key 'general.alignment' not found. Using default "
507 "alignment: " +
508 std::to_string(alignment));
509 }
510 } catch (const std::bad_variant_access& e) {
512 "Could not read 'general.alignment' metadata as uint32. Using default "
513 "alignment: " +
514 std::to_string(alignment));
515 } catch (const std::exception& e) {
517 std::string(e.what()) +
518 ". Using default alignment: " + std::to_string(alignment));
519 }
521
522 uint64_t current_pos_metadata_stream = metadata_file.tellg();
523 Logger::info(
"[GGUF_LOAD] Current file position (metadata stream) before padding seek: " +
524 std::to_string(current_pos_metadata_stream));
525 uint64_t padding = (alignment - (current_pos_metadata_stream % alignment)) % alignment;
526 Logger::info(
"[GGUF_LOAD] Calculated padding: " + std::to_string(padding));
527
528 uint64_t actual_data_start_offset_in_file = current_pos_metadata_stream + padding;
530 "[GGUF_LOAD] Calculated actual_data_start_offset_in_file (for mmap): " +
531 std::to_string(actual_data_start_offset_in_file));
532
533 metadata_file.close();
535
536 if (!use_mmap) {
537 Logger::info(
"[GGUF_LOAD] mmap is disabled by configuration. Loading tensor data into memory using OPTIMIZED bulk I/O.");
538
539 uint64_t total_tensor_data_size = 0;
540 for (const auto& tensor_info : result.tensor_infos) {
541 total_tensor_data_size = std::max(total_tensor_data_size, tensor_info.offset + tensor_info.size_in_bytes);
542 }
543
544 if (total_tensor_data_size > 0) {
546
547 std::ifstream tensor_file(filename, std::ios::binary);
548 if (!tensor_file.is_open()) {
549 throw std::runtime_error("Failed to open file for tensor data reading: " + filename);
550 }
551
552 tensor_file.seekg(actual_data_start_offset_in_file);
553 if (!tensor_file) {
554 throw std::runtime_error("Failed to seek to tensor data start in file: " + filename);
555 }
556
557
558 constexpr size_t BULK_READ_BUFFER_SIZE = 64 * 1024 * 1024;
559 size_t bytes_remaining = total_tensor_data_size;
560 size_t bytes_read_total = 0;
561
562 Logger::info(
"[GGUF_LOAD] Reading " + std::to_string(total_tensor_data_size) +
" bytes in optimized " + std::to_string(BULK_READ_BUFFER_SIZE / (1024*1024)) +
"MB chunks...");
563
564 while (bytes_remaining > 0) {
565 size_t chunk_size = std::min(bytes_remaining, BULK_READ_BUFFER_SIZE);
566
567 tensor_file.read(
reinterpret_cast<char*
>(result.
tensor_data.data() + bytes_read_total), chunk_size);
568 if (!tensor_file) {
569 throw std::runtime_error("Failed to read tensor data chunk at offset " + std::to_string(bytes_read_total) + " from file: " + filename);
570 }
571
572 bytes_read_total += chunk_size;
573 bytes_remaining -= chunk_size;
574
575 if (bytes_read_total % (256 * 1024 * 1024) == 0) {
576 Logger::info(
"[GGUF_LOAD] Progress: " + std::to_string(bytes_read_total / (1024*1024)) +
"MB / " + std::to_string(total_tensor_data_size / (1024*1024)) +
"MB loaded");
577 }
578 }
579
580 tensor_file.close();
581 Logger::info(
"[GGUF_LOAD] Successfully loaded " + std::to_string(total_tensor_data_size) +
" bytes of tensor data using optimized bulk I/O.");
582
583 if (total_tensor_data_size >= 16) {
584 std::stringstream ss_bytes;
585 ss_bytes << "[GGUF_LOAD] First 16 bytes of tensor data: ";
586 for (int i = 0; i < 16; ++i) {
587 ss_bytes <<
"0x" << std::hex << static_cast<int>(result.
tensor_data[i]) <<
" ";
588 }
590 }
591 } else {
592 Logger::info(
"[GGUF_LOAD] No tensor data to load (total size is 0).");
593 }
594
595 return result;
596 }
597
598#ifndef _WIN32
601 throw std::runtime_error("GGUF Error: Failed to open file for mmap: " + filename + " - " + strerror(errno));
602 }
604
605 struct stat file_stat;
609 throw std::runtime_error("GGUF Error: Failed to fstat file for mmap: " + filename + " - " + strerror(errno));
610 }
611 uint64_t file_total_size = static_cast<uint64_t>(file_stat.st_size);
612#else
613 result.h_file = CreateFileA(
614 filename.c_str(),
615 GENERIC_READ,
616 FILE_SHARE_READ,
617 NULL,
618 OPEN_EXISTING,
619 FILE_ATTRIBUTE_NORMAL | FILE_FLAG_RANDOM_ACCESS,
620 NULL
621 );
622 if (result.h_file == INVALID_HANDLE_VALUE) {
623 throw std::runtime_error("GGUF Error: Failed to open file for mmap (CreateFileA): " + filename + " - " + GetWindowsErrorString(GetLastError()));
624 }
625 Logger::info(
"[GGUF_LOAD] File opened for mmap with h_file: " + std::to_string(
reinterpret_cast<uintptr_t
>(result.h_file)));
626
627 LARGE_INTEGER fileSizeWindows;
628 if (!GetFileSizeEx(result.h_file, &fileSizeWindows)) {
629 DWORD error_code = GetLastError();
630 CloseHandle(result.h_file);
631 result.h_file = INVALID_HANDLE_VALUE;
632 throw std::runtime_error("GGUF Error: Failed to GetFileSizeEx for mmap: " + filename + " - " + GetWindowsErrorString(error_code));
633 }
634 uint64_t file_total_size = static_cast<uint64_t>(fileSizeWindows.QuadPart);
635#endif
636
637 if (file_total_size < actual_data_start_offset_in_file) {
638#ifndef _WIN32
641#else
642 CloseHandle(result.h_file);
643 result.h_file = INVALID_HANDLE_VALUE;
644#endif
645 throw std::runtime_error(
646 "GGUF Error: File total size (" + std::to_string(file_total_size) +
647 ") is less than calculated actual_data_start_offset_in_file (" + std::to_string(actual_data_start_offset_in_file) + ").");
648 }
649
650 uint64_t tensor_data_block_size_on_disk = file_total_size - actual_data_start_offset_in_file;
651 Logger::info(
"[GGUF_LOAD] Calculated tensor_data_block_size_on_disk (for mmap length calculation): " +
652 std::to_string(tensor_data_block_size_on_disk) + " bytes.");
653
654 long page_size;
655#ifndef _WIN32
656 page_size = sysconf(_SC_PAGE_SIZE);
657 if (page_size == -1) {
660 throw std::runtime_error(std::string("GGUF Error: Failed to get page size using sysconf - ") + strerror(errno));
661 }
662#else
663 SYSTEM_INFO sysInfo;
664 GetSystemInfo(&sysInfo);
665
666
667 page_size = static_cast<long>(sysInfo.dwAllocationGranularity);
668 if (page_size <= 0) {
669 CloseHandle(result.h_file);
670 result.h_file = INVALID_HANDLE_VALUE;
671 throw std::runtime_error("GGUF Error: Failed to get valid system allocation granularity (page_size equivalent for mmap offset).");
672 }
673#endif
674 Logger::info(
"[GGUF_LOAD] System page/allocation granularity for mmap offset: " + std::to_string(page_size));
675
676 uint64_t mmap_offset = (actual_data_start_offset_in_file / page_size) * page_size;
677 result.
offset_diff_for_mmap =
static_cast<size_t>(actual_data_start_offset_in_file - mmap_offset);
678 size_t mmap_length =
static_cast<size_t>(tensor_data_block_size_on_disk + result.
offset_diff_for_mmap);
679
680 Logger::info(
"[GGUF_LOAD] Aligning mmap: actual_data_start_offset_in_file=" + std::to_string(actual_data_start_offset_in_file) +
681 ", mmap_offset=" + std::to_string(mmap_offset) +
683 ", mmap_length=" + std::to_string(mmap_length));
684
685 if (mmap_length > 0) {
687#ifndef _WIN32
689 PROT_READ, MAP_SHARED,
691#else
692 result.h_map_file = CreateFileMapping(
693 result.h_file,
694 NULL,
695 PAGE_READONLY,
696 0,
697 0,
698 NULL
699 );
700 if (result.h_map_file == NULL) {
701 DWORD error_code = GetLastError();
702 CloseHandle(result.h_file);
703 result.h_file = INVALID_HANDLE_VALUE;
704 throw std::runtime_error("GGUF Error: CreateFileMapping failed - " + GetWindowsErrorString(error_code));
705 }
706
707
708
709 DWORD mmap_offset_low = static_cast<DWORD>(mmap_offset & 0xFFFFFFFF);
710 DWORD mmap_offset_high = static_cast<DWORD>((mmap_offset >> 32) & 0xFFFFFFFF);
711
713 result.h_map_file,
714 FILE_MAP_READ,
715 mmap_offset_high,
716 mmap_offset_low,
718 );
719#endif
720
722 int last_error = 0;
723#ifndef _WIN32
724 last_error = errno;
725
726#else
727 last_error = GetLastError();
728
729#endif
733 throw std::runtime_error("GGUF Error: mmap/MapViewOfFile failed. Aligned Offset: " + std::to_string(mmap_offset) +
734 ", Mmap Length: " + std::to_string(mmap_length) +
735#ifndef _WIN32
736 " - POSIX Error: " + strerror(last_error));
737#else
738 " - Windows Error: " + GetWindowsErrorString(last_error));
739#endif
740 }
741 Logger::info(
"[GGUF_LOAD] Successfully mmapped tensor data block. Mapped Address: " +
744 " bytes from file offset " + std::to_string(mmap_offset));
745
747 std::stringstream ss_bytes;
748 ss_bytes << "[GGUF_LOAD] First 16 bytes of *actual* tensor data (after offset_diff) in mmap: ";
750 for (int i = 0; i < 16; ++i)
751 ss_bytes << "0x" << std::hex << static_cast<int>(actual_data_ptr_debug[i]) << " ";
753 }
754
755#ifndef _WIN32
756
757 Logger::info(
"[GGUF_LOAD] Attempting to prefetch mmapped tensor data using posix_madvise(MADV_WILLNEED)...");
759
760 if (page_size <= 0) {
761 Logger::error(
"[GGUF_LOAD] Invalid page_size for madvise alignment: " + std::to_string(page_size) +
". Skipping prefetch.");
762 } else {
763 for (const auto& tensor_info : result.tensor_infos) {
764 if (tensor_info.size_in_bytes > 0) {
765 uintptr_t exact_tensor_start_addr_val = reinterpret_cast<uintptr_t>(actual_tensor_data_block_start_in_mmap + tensor_info.offset);
766 void* page_aligned_madvise_addr = reinterpret_cast<void*>(exact_tensor_start_addr_val - (exact_tensor_start_addr_val % static_cast<uintptr_t>(page_size)));
767 size_t madvise_length = (exact_tensor_start_addr_val + tensor_info.size_in_bytes) - reinterpret_cast<uintptr_t>(page_aligned_madvise_addr);
768
769 uintptr_t advised_region_start_val = reinterpret_cast<uintptr_t>(page_aligned_madvise_addr);
770 uintptr_t advised_region_end_val = advised_region_start_val + madvise_length;
771 uintptr_t overall_mmap_start_val =
reinterpret_cast<uintptr_t
>(result.
mapped_tensor_data);
773
774 if (advised_region_start_val >= overall_mmap_start_val && advised_region_end_val <= overall_mmap_end_val && advised_region_start_val < advised_region_end_val) {
775 int ret = posix_madvise(page_aligned_madvise_addr, madvise_length, POSIX_MADV_WILLNEED);
776 if (ret != 0) {
777 Logger::warning(
"[GGUF_LOAD] posix_madvise failed for tensor '" + tensor_info.name +
778 "' (addr: " + std::to_string(reinterpret_cast<uintptr_t>(page_aligned_madvise_addr)) +
779 ", len: " + std::to_string(madvise_length) +
780 ") with error code " + std::to_string(errno) +
781 " (" + strerror(errno) + "). Skipping prefetch for this tensor.");
782 }
783 } else {
785 "' calculated region for madvise is invalid or out of overall mmap bounds. Skipping prefetch. "
786 );
787 }
788 }
789 }
790 }
791 Logger::info(
"[GGUF_LOAD] Finished POSIX prefetching attempt with posix_madvise.");
792
793#else
794 Logger::info(
"[GGUF_LOAD] Tensor prefetching (posix_madvise) is currently implemented for POSIX systems. Skipping for Windows for now.");
795#endif
796
797 } else {
798 Logger::info(
"[GGUF_LOAD] Tensor data block size (or mmap_length) is 0. Nothing to mmap.");
802 }
803
804 Logger::info(
"GGUF metadata loaded and tensor data (if any) mmapped successfully.");
805 return result;
806}
static void warning(const std::string &message)
static void info(const std::string &message)
static void error(const std::string &message)
GGMLType
Enumeration of GGML tensor data types.
GGUFValueType
Enumeration of value types used in GGUF metadata.
size_t gguf_value_type_size(GGUFValueType type)
void read_raw(std::ifstream &file, T &dest)
Reads raw binary data from a file stream.
std::string read_gguf_string(std::ifstream &file)
Reads a string from a GGUF format file.
constexpr uint32_t GGUF_MAGIC
GGUF magic number that identifies the file format Spells "GGUF" in ASCII (0x47475546)
constexpr uint32_t GGUF_MAX_TENSOR_DIMS
constexpr uint64_t GGUF_DEFAULT_ALIGNMENT
Constants for GGUF file parsing and validation.
size_t ggml_type_block_size(GGMLType type)
Gets the block size for a GGML type.
size_t ggml_type_size(GGMLType type)
Gets the size in bytes of a GGML type.
const char * ggml_type_name(GGMLType type)
Gets the string name of a GGML type.
Represents an array in GGUF metadata.
Complete representation of a GGUF file's contents.
static const void * MMapFailure
std::vector< GGUFTensorInfo > tensor_infos
std::vector< std::string > tokenizer_tokens
std::vector< float > tokenizer_scores
size_t offset_diff_for_mmap
std::vector< uint8_t > tensor_data
std::vector< std::string > tokenizer_merges
size_t mapped_tensor_data_size
std::map< std::string, GGUFMetadataValue > metadata
void * mapped_tensor_data
std::map< std::string, GGUFTensorInfo > tensor_infos_map
std::vector< uint32_t > tokenizer_token_types
Information about a tensor stored in a GGUF file.
std::vector< uint64_t > shape