124 Logger::info(
"Attempting to load GGUF file: " + filename + (use_mmap ?
" with mmap" :
" without mmap"));
125 std::ifstream metadata_file(filename, std::ios::binary);
126 if (!metadata_file.is_open()) {
127 throw std::runtime_error(
"Failed to open file for metadata: " + filename);
140 std::stringstream ss;
141 ss <<
"Read Header:\n"
142 <<
" Magic: 0x" << std::hex << result.
header.
magic << std::dec <<
"\n"
150 throw std::runtime_error(
"Not a valid GGUF file (magic number mismatch).");
160 read_raw(metadata_file, value_type_enum);
162 switch (value_type_enum) {
237 read_raw(metadata_file, array_type_enum);
241 array_obj.
type = array_type_enum;
242 array_obj.
len = count;
244 bool skipped_data =
false;
245 if (key ==
"tokenizer.ggml.tokens" &&
247 Logger::info(
"Loading STRING array data ('" + key +
"') with " +
248 std::to_string(count) +
" elements...");
250 for (uint64_t arr_i = 0; arr_i < count; ++arr_i) {
255 }
else if (key ==
"tokenizer.ggml.scores" &&
257 Logger::info(
"Loading FLOAT32 array data ('" + key +
"') with " +
258 std::to_string(count) +
" elements...");
261 static_cast<std::streamsize
>(count *
sizeof(
float)));
262 if (!metadata_file) {
263 throw std::runtime_error(
264 "GGUF Error: Failed to read scores array data.");
268 }
else if (key ==
"tokenizer.ggml.token_type" &&
271 " array data ('" + key +
"') with " +
272 std::to_string(count) +
" elements...");
277 static_cast<std::streamsize
>(count *
sizeof(uint32_t)));
279 std::vector<int32_t> temp_s32_types(
static_cast<size_t>(count));
281 reinterpret_cast<char*
>(temp_s32_types.data()),
282 static_cast<std::streamsize
>(count *
sizeof(int32_t)));
283 for(
size_t k=0; k < count; ++k) {
287 if (!metadata_file) {
288 throw std::runtime_error(
289 "GGUF Error: Failed to read token_type array data.");
293 }
else if (key ==
"tokenizer.ggml.merges" &&
295 Logger::info(
"Loading STRING array data ('" + key +
"') with " +
296 std::to_string(count) +
" elements...");
298 for (uint64_t arr_i = 0; arr_i < count; ++arr_i) {
306 "Skipping unhandled/non-tokenizer ARRAY data for key '" + key +
308 std::to_string(
static_cast<uint32_t
>(array_type_enum)) +
309 ", Count: " + std::to_string(count) +
")");
312 for (uint64_t arr_i = 0; arr_i < count; ++arr_i) {
315 }
catch (
const std::exception& e) {
317 std::to_string(arr_i) +
" for key '" + key +
324 if (element_size == 0) {
325 throw std::runtime_error(
326 "Cannot skip array for key '" + key +
327 "' with unsupported or variable-sized element type: " +
328 std::to_string(
static_cast<uint32_t
>(array_type_enum)));
332 element_size > std::numeric_limits<uint64_t>::max() / count) {
333 throw std::overflow_error(
334 "Array size overflow calculating skip amount for key '" +
337 uint64_t total_size_to_skip = count * element_size;
338 if (total_size_to_skip > 0) {
339 metadata_file.seekg(
static_cast<std::streamoff
>(total_size_to_skip),
341 if (!metadata_file) {
342 throw std::runtime_error(
343 "GGUF Error: Failed to seek past array data for key '" +
352 throw std::runtime_error(
353 "Unknown metadata type encountered: " +
354 std::to_string(
static_cast<uint32_t
>(value_type_enum)) +
358 }
catch (
const std::exception& e) {
359 std::string error_key =
360 key.empty() ?
"(unknown key, error during key read)" : key;
362 "Error reading metadata for key: '" + error_key +
363 "' (type: " + std::to_string(
static_cast<uint32_t
>(value_type_enum)) +
373 uint64_t accumulated_offset_debug = 0;
382 throw std::runtime_error(
"Tensor '" + info.
name +
383 "' has unsupported number of dimensions: " +
384 std::to_string(n_dims));
386 info.
shape.resize(n_dims);
387 for (uint32_t d = 0; d < n_dims; ++d) {
391 uint32_t ggml_type_u32;
392 read_raw(metadata_file, ggml_type_u32);
395 uint64_t pos_before_offset_read = metadata_file.tellg();
399 std::stringstream ss_offset_log;
401 <<
"[GGUF_TENSOR_INFO] Tensor " << i <<
" ('" << info.
name
402 <<
"'):" <<
"\n Raw offset from file: " << info.
offset
403 <<
"\n File pos before offset read: " << pos_before_offset_read
404 <<
"\n Calculated accumulated_offset_debug (before this tensor): "
405 << accumulated_offset_debug;
408 for (uint64_t dim : info.
shape) {
410 info.
num_elements > std::numeric_limits<uint64_t>::max() / dim) {
411 throw std::overflow_error(
412 "Tensor dimension overflow calculating num_elements for tensor "
423 throw std::runtime_error(
424 "Tensor '" + info.
name +
425 "' has unknown or unsupported type: " + std::to_string(info.
type));
428 if (block_size > 1) {
430 throw std::runtime_error(
"Tensor '" + info.
name +
"' num_elements (" +
432 ") not divisible by block_size (" +
433 std::to_string(block_size) +
") for type " +
438 num_blocks > std::numeric_limits<uint64_t>::max() / type_size) {
439 throw std::overflow_error(
440 "Tensor size overflow calculating size_in_bytes for tensor '" +
443 info.
size_in_bytes =
static_cast<size_t>(num_blocks * type_size);
447 std::numeric_limits<uint64_t>::max() / type_size) {
448 throw std::overflow_error(
449 "Tensor size overflow calculating size_in_bytes for tensor '" +
455 ss_offset_log <<
"\n Calculated size_in_bytes for this tensor: "
462 std::stringstream ss_tensor;
463 ss_tensor <<
"Tensor " << i <<
": Name='" << info.
name
465 for (
size_t d = 0; d < info.
shape.size(); ++d)
466 ss_tensor << info.
shape[d]
467 << (d == info.
shape.size() - 1 ?
"" :
", ");
468 ss_tensor <<
" ], Offset=" << info.
offset
472 }
catch (
const std::exception& e) {
473 std::string tensor_name =
474 info.
name.empty() ? (
"(unknown, index " + std::to_string(i) +
")")
476 Logger::error(
"Error reading tensor info for tensor " + tensor_name +
486 Logger::warning(
"Duplicate tensor name found in GGUF: '" + tinfo.name +
487 "'. Overwriting entry in map.");
491 Logger::info(
"Finished populating tensor_infos_map. Map size: " +
496 if (result.
metadata.count(
"general.alignment")) {
498 std::get<uint32_t>(result.
metadata[
"general.alignment"]);
500 alignment = align_val;
503 std::to_string(alignment));
506 "Metadata key 'general.alignment' not found. Using default "
508 std::to_string(alignment));
510 }
catch (
const std::bad_variant_access& e) {
512 "Could not read 'general.alignment' metadata as uint32. Using default "
514 std::to_string(alignment));
515 }
catch (
const std::exception& e) {
517 std::string(e.what()) +
518 ". Using default alignment: " + std::to_string(alignment));
522 uint64_t current_pos_metadata_stream = metadata_file.tellg();
523 Logger::info(
"[GGUF_LOAD] Current file position (metadata stream) before padding seek: " +
524 std::to_string(current_pos_metadata_stream));
525 uint64_t padding = (alignment - (current_pos_metadata_stream % alignment)) % alignment;
526 Logger::info(
"[GGUF_LOAD] Calculated padding: " + std::to_string(padding));
528 uint64_t actual_data_start_offset_in_file = current_pos_metadata_stream + padding;
530 "[GGUF_LOAD] Calculated actual_data_start_offset_in_file (for mmap): " +
531 std::to_string(actual_data_start_offset_in_file));
533 metadata_file.close();
537 Logger::info(
"[GGUF_LOAD] mmap is disabled by configuration. Loading tensor data into memory using OPTIMIZED bulk I/O.");
539 uint64_t total_tensor_data_size = 0;
541 total_tensor_data_size = std::max(total_tensor_data_size, tensor_info.offset + tensor_info.size_in_bytes);
544 if (total_tensor_data_size > 0) {
547 std::ifstream tensor_file(filename, std::ios::binary);
548 if (!tensor_file.is_open()) {
549 throw std::runtime_error(
"Failed to open file for tensor data reading: " + filename);
552 tensor_file.seekg(actual_data_start_offset_in_file);
554 throw std::runtime_error(
"Failed to seek to tensor data start in file: " + filename);
558 constexpr size_t BULK_READ_BUFFER_SIZE = 64 * 1024 * 1024;
559 size_t bytes_remaining = total_tensor_data_size;
560 size_t bytes_read_total = 0;
562 Logger::info(
"[GGUF_LOAD] Reading " + std::to_string(total_tensor_data_size) +
" bytes in optimized " + std::to_string(BULK_READ_BUFFER_SIZE / (1024*1024)) +
"MB chunks...");
564 while (bytes_remaining > 0) {
565 size_t chunk_size = std::min(bytes_remaining, BULK_READ_BUFFER_SIZE);
567 tensor_file.read(
reinterpret_cast<char*
>(result.
tensor_data.data() + bytes_read_total), chunk_size);
569 throw std::runtime_error(
"Failed to read tensor data chunk at offset " + std::to_string(bytes_read_total) +
" from file: " + filename);
572 bytes_read_total += chunk_size;
573 bytes_remaining -= chunk_size;
575 if (bytes_read_total % (256 * 1024 * 1024) == 0) {
576 Logger::info(
"[GGUF_LOAD] Progress: " + std::to_string(bytes_read_total / (1024*1024)) +
"MB / " + std::to_string(total_tensor_data_size / (1024*1024)) +
"MB loaded");
581 Logger::info(
"[GGUF_LOAD] Successfully loaded " + std::to_string(total_tensor_data_size) +
" bytes of tensor data using optimized bulk I/O.");
583 if (total_tensor_data_size >= 16) {
584 std::stringstream ss_bytes;
585 ss_bytes <<
"[GGUF_LOAD] First 16 bytes of tensor data: ";
586 for (
int i = 0; i < 16; ++i) {
587 ss_bytes <<
"0x" << std::hex << static_cast<int>(result.
tensor_data[i]) <<
" ";
592 Logger::info(
"[GGUF_LOAD] No tensor data to load (total size is 0).");
601 throw std::runtime_error(
"GGUF Error: Failed to open file for mmap: " + filename +
" - " + strerror(errno));
605 struct stat file_stat;
609 throw std::runtime_error(
"GGUF Error: Failed to fstat file for mmap: " + filename +
" - " + strerror(errno));
611 uint64_t file_total_size =
static_cast<uint64_t
>(file_stat.st_size);
613 result.h_file = CreateFileA(
619 FILE_ATTRIBUTE_NORMAL | FILE_FLAG_RANDOM_ACCESS,
622 if (result.h_file == INVALID_HANDLE_VALUE) {
623 throw std::runtime_error(
"GGUF Error: Failed to open file for mmap (CreateFileA): " + filename +
" - " + GetWindowsErrorString(GetLastError()));
625 Logger::info(
"[GGUF_LOAD] File opened for mmap with h_file: " + std::to_string(
reinterpret_cast<uintptr_t
>(result.h_file)));
627 LARGE_INTEGER fileSizeWindows;
628 if (!GetFileSizeEx(result.h_file, &fileSizeWindows)) {
629 DWORD error_code = GetLastError();
630 CloseHandle(result.h_file);
631 result.h_file = INVALID_HANDLE_VALUE;
632 throw std::runtime_error(
"GGUF Error: Failed to GetFileSizeEx for mmap: " + filename +
" - " + GetWindowsErrorString(error_code));
634 uint64_t file_total_size =
static_cast<uint64_t
>(fileSizeWindows.QuadPart);
637 if (file_total_size < actual_data_start_offset_in_file) {
642 CloseHandle(result.h_file);
643 result.h_file = INVALID_HANDLE_VALUE;
645 throw std::runtime_error(
646 "GGUF Error: File total size (" + std::to_string(file_total_size) +
647 ") is less than calculated actual_data_start_offset_in_file (" + std::to_string(actual_data_start_offset_in_file) +
").");
650 uint64_t tensor_data_block_size_on_disk = file_total_size - actual_data_start_offset_in_file;
651 Logger::info(
"[GGUF_LOAD] Calculated tensor_data_block_size_on_disk (for mmap length calculation): " +
652 std::to_string(tensor_data_block_size_on_disk) +
" bytes.");
656 page_size = sysconf(_SC_PAGE_SIZE);
657 if (page_size == -1) {
660 throw std::runtime_error(std::string(
"GGUF Error: Failed to get page size using sysconf - ") + strerror(errno));
664 GetSystemInfo(&sysInfo);
667 page_size =
static_cast<long>(sysInfo.dwAllocationGranularity);
668 if (page_size <= 0) {
669 CloseHandle(result.h_file);
670 result.h_file = INVALID_HANDLE_VALUE;
671 throw std::runtime_error(
"GGUF Error: Failed to get valid system allocation granularity (page_size equivalent for mmap offset).");
674 Logger::info(
"[GGUF_LOAD] System page/allocation granularity for mmap offset: " + std::to_string(page_size));
676 uint64_t mmap_offset = (actual_data_start_offset_in_file / page_size) * page_size;
677 result.
offset_diff_for_mmap =
static_cast<size_t>(actual_data_start_offset_in_file - mmap_offset);
678 size_t mmap_length =
static_cast<size_t>(tensor_data_block_size_on_disk + result.
offset_diff_for_mmap);
680 Logger::info(
"[GGUF_LOAD] Aligning mmap: actual_data_start_offset_in_file=" + std::to_string(actual_data_start_offset_in_file) +
681 ", mmap_offset=" + std::to_string(mmap_offset) +
683 ", mmap_length=" + std::to_string(mmap_length));
685 if (mmap_length > 0) {
689 PROT_READ, MAP_SHARED,
692 result.h_map_file = CreateFileMapping(
700 if (result.h_map_file == NULL) {
701 DWORD error_code = GetLastError();
702 CloseHandle(result.h_file);
703 result.h_file = INVALID_HANDLE_VALUE;
704 throw std::runtime_error(
"GGUF Error: CreateFileMapping failed - " + GetWindowsErrorString(error_code));
709 DWORD mmap_offset_low =
static_cast<DWORD
>(mmap_offset & 0xFFFFFFFF);
710 DWORD mmap_offset_high =
static_cast<DWORD
>((mmap_offset >> 32) & 0xFFFFFFFF);
727 last_error = GetLastError();
733 throw std::runtime_error(
"GGUF Error: mmap/MapViewOfFile failed. Aligned Offset: " + std::to_string(mmap_offset) +
734 ", Mmap Length: " + std::to_string(mmap_length) +
736 " - POSIX Error: " + strerror(last_error));
738 " - Windows Error: " + GetWindowsErrorString(last_error));
741 Logger::info(
"[GGUF_LOAD] Successfully mmapped tensor data block. Mapped Address: " +
744 " bytes from file offset " + std::to_string(mmap_offset));
747 std::stringstream ss_bytes;
748 ss_bytes <<
"[GGUF_LOAD] First 16 bytes of *actual* tensor data (after offset_diff) in mmap: ";
750 for (
int i = 0; i < 16; ++i)
751 ss_bytes <<
"0x" << std::hex <<
static_cast<int>(actual_data_ptr_debug[i]) <<
" ";
757 Logger::info(
"[GGUF_LOAD] Attempting to prefetch mmapped tensor data using posix_madvise(MADV_WILLNEED)...");
760 if (page_size <= 0) {
761 Logger::error(
"[GGUF_LOAD] Invalid page_size for madvise alignment: " + std::to_string(page_size) +
". Skipping prefetch.");
764 if (tensor_info.size_in_bytes > 0) {
765 uintptr_t exact_tensor_start_addr_val =
reinterpret_cast<uintptr_t
>(actual_tensor_data_block_start_in_mmap + tensor_info.offset);
766 void* page_aligned_madvise_addr =
reinterpret_cast<void*
>(exact_tensor_start_addr_val - (exact_tensor_start_addr_val %
static_cast<uintptr_t
>(page_size)));
767 size_t madvise_length = (exact_tensor_start_addr_val + tensor_info.size_in_bytes) -
reinterpret_cast<uintptr_t
>(page_aligned_madvise_addr);
769 uintptr_t advised_region_start_val =
reinterpret_cast<uintptr_t
>(page_aligned_madvise_addr);
770 uintptr_t advised_region_end_val = advised_region_start_val + madvise_length;
771 uintptr_t overall_mmap_start_val =
reinterpret_cast<uintptr_t
>(result.
mapped_tensor_data);
774 if (advised_region_start_val >= overall_mmap_start_val && advised_region_end_val <= overall_mmap_end_val && advised_region_start_val < advised_region_end_val) {
775 int ret = posix_madvise(page_aligned_madvise_addr, madvise_length, POSIX_MADV_WILLNEED);
777 Logger::warning(
"[GGUF_LOAD] posix_madvise failed for tensor '" + tensor_info.name +
778 "' (addr: " + std::to_string(
reinterpret_cast<uintptr_t
>(page_aligned_madvise_addr)) +
779 ", len: " + std::to_string(madvise_length) +
780 ") with error code " + std::to_string(errno) +
781 " (" + strerror(errno) +
"). Skipping prefetch for this tensor.");
785 "' calculated region for madvise is invalid or out of overall mmap bounds. Skipping prefetch. "
791 Logger::info(
"[GGUF_LOAD] Finished POSIX prefetching attempt with posix_madvise.");
794 Logger::info(
"[GGUF_LOAD] Tensor prefetching (posix_madvise) is currently implemented for POSIX systems. Skipping for Windows for now.");
798 Logger::info(
"[GGUF_LOAD] Tensor data block size (or mmap_length) is 0. Nothing to mmap.");
804 Logger::info(
"GGUF metadata loaded and tensor data (if any) mmapped successfully.");