tinyllama.cpp/gguf__structs_8h_source.html

#pragma once


#include <cstdint>

#include <map>

#include <string>

#include <variant>

#include <vector>


// mmap related includes

#ifndef _WIN32

#include <sys/mman.h>   // For mmap, munmap, MAP_FAILED, posix_madvise

#include <sys/stat.h>   // For fstat, stat

#include <fcntl.h>      // For O_RDONLY

#include <unistd.h>     // For close, fstat, read, lseek, sysconf, _SC_PAGE_SIZE

#else

#define WIN32_LEAN_AND_MEAN

#include <windows.h>    // For CreateFile, CreateFileMapping, MapViewOfFile, etc.

                        // Also for GetSystemInfo, SYSTEM_INFO, PrefetchVirtualMemory (if used)

#endif


#include "ggml_types.h"


struct GGUFArray {

  GGUFValueType type;

  uint64_t len;

};


struct GGUFHeader {

  uint32_t magic;

  uint32_t version;

  uint64_t tensor_count;

  uint64_t metadata_kv_count;

};


using GGUFMetadataValue =

    std::variant<uint8_t, int8_t, uint16_t, int16_t, uint32_t, int32_t, float,

                 bool, std::string, uint64_t, int64_t, double, GGUFArray>;


struct GGUFTensorInfo {

  std::string name;

  std::vector<uint64_t> shape;

  GGMLType type;

  uint64_t offset;

  size_t num_elements;

  size_t size_in_bytes;

};


struct GGUFData {

  GGUFHeader header;

  std::map<std::string, GGUFMetadataValue> metadata;

  std::vector<GGUFTensorInfo> tensor_infos;

  std::map<std::string, GGUFTensorInfo> tensor_infos_map;

  // Tokenizer-specific data

  std::vector<std::string> tokenizer_tokens;

  std::vector<float> tokenizer_scores;

  std::vector<uint32_t> tokenizer_token_types;

  std::vector<std::string> tokenizer_merges;

  // Memory-mapped tensor data related fields

#ifndef _WIN32

  int file_descriptor = -1;

  static const void* MMapFailure;

#else

  HANDLE h_file = INVALID_HANDLE_VALUE;

  HANDLE h_map_file = NULL;

  static constexpr void* const MMapFailure = NULL;

#endif

  void* mapped_tensor_data = nullptr;

  size_t mapped_tensor_data_size = 0;

  uint64_t data_alignment = 32;

  size_t offset_diff_for_mmap = 0;

  // Non-mmap tensor data (for when mmap is disabled)

  std::vector<uint8_t> tensor_data;

  // Default constructor

#ifndef _WIN32

  GGUFData() : file_descriptor(-1), mapped_tensor_data(nullptr), mapped_tensor_data_size(0), data_alignment(32), offset_diff_for_mmap(0) {}

#else

  GGUFData() : h_file(INVALID_HANDLE_VALUE), h_map_file(NULL), mapped_tensor_data(nullptr), mapped_tensor_data_size(0), data_alignment(32), offset_diff_for_mmap(0) {}

#endif


  // Destructor to clean up memory map and file descriptor/handles


  ~GGUFData() {

#ifndef _WIN32

    if (mapped_tensor_data != nullptr && mapped_tensor_data != MMapFailure) { // MMapFailure will expand to MAP_FAILED

      munmap(mapped_tensor_data, mapped_tensor_data_size);

    }

    if (file_descriptor != -1) {

      close(file_descriptor);

    }

    file_descriptor = -1;

#else // _WIN32

    if (mapped_tensor_data != nullptr) { // On Windows, MapViewOfFile returns NULL on failure

      UnmapViewOfFile(mapped_tensor_data);

    }

    if (h_map_file != NULL) {

      CloseHandle(h_map_file);

    }

    if (h_file != INVALID_HANDLE_VALUE) {

      CloseHandle(h_file);

    }

    h_file = INVALID_HANDLE_VALUE;

    h_map_file = NULL;

#endif

    mapped_tensor_data = nullptr; // Common for both

    mapped_tensor_data_size = 0;  // Common for both

    offset_diff_for_mmap = 0;     // Common for both

  }


  // Prevent accidental copying

  GGUFData(const GGUFData&) = delete;

  GGUFData& operator=(const GGUFData&) = delete;


  // Allow move semantics


  GGUFData(GGUFData&& other) noexcept

    : header(other.header)

    , metadata(std::move(other.metadata))

    , tensor_infos(std::move(other.tensor_infos))

    , tensor_infos_map(std::move(other.tensor_infos_map))

    , tokenizer_tokens(std::move(other.tokenizer_tokens))

    , tokenizer_scores(std::move(other.tokenizer_scores))

    , tokenizer_token_types(std::move(other.tokenizer_token_types))

    , tokenizer_merges(std::move(other.tokenizer_merges))

    // Platform-specific handles

#ifndef _WIN32

    , file_descriptor(other.file_descriptor)

#else

    , h_file(other.h_file)

    , h_map_file(other.h_map_file)

#endif

    , mapped_tensor_data(other.mapped_tensor_data)

    , mapped_tensor_data_size(other.mapped_tensor_data_size)

    , data_alignment(other.data_alignment)

    , offset_diff_for_mmap(other.offset_diff_for_mmap)

    , tensor_data(std::move(other.tensor_data))

  {

    // Leave other in a valid but safe state (resources transferred)

#ifndef _WIN32

    other.file_descriptor = -1;

#else

    other.h_file = INVALID_HANDLE_VALUE;

    other.h_map_file = NULL;

#endif

    other.mapped_tensor_data = nullptr;

    other.mapped_tensor_data_size = 0;

    other.offset_diff_for_mmap = 0;

  }


  GGUFData& operator=(GGUFData&& other) noexcept {

    if (this != &other) {

      // Clean up existing resources first (using this object's current platform state)

#ifndef _WIN32

      if (mapped_tensor_data != nullptr && mapped_tensor_data != MMapFailure) { // MMapFailure will expand to MAP_FAILED

        munmap(mapped_tensor_data, mapped_tensor_data_size);

      }

      if (file_descriptor != -1) {

        close(file_descriptor);

      }

#else // _WIN32

      if (mapped_tensor_data != nullptr) {

        UnmapViewOfFile(mapped_tensor_data);

      }

      if (h_map_file != NULL) {

        CloseHandle(h_map_file);

      }

      if (h_file != INVALID_HANDLE_VALUE) {

        CloseHandle(h_file);

      }

#endif


      // Move data members

      header = other.header;

      metadata = std::move(other.metadata);

      tensor_infos = std::move(other.tensor_infos);

      tensor_infos_map = std::move(other.tensor_infos_map);

      tokenizer_tokens = std::move(other.tokenizer_tokens);

      tokenizer_scores = std::move(other.tokenizer_scores);

      tokenizer_token_types = std::move(other.tokenizer_token_types);

      tokenizer_merges = std::move(other.tokenizer_merges);


      // Move platform-specific handles and mmap data

#ifndef _WIN32

      file_descriptor = other.file_descriptor;

#else

      h_file = other.h_file;

      h_map_file = other.h_map_file;

#endif

      mapped_tensor_data = other.mapped_tensor_data;

      mapped_tensor_data_size = other.mapped_tensor_data_size;

      data_alignment = other.data_alignment;

      offset_diff_for_mmap = other.offset_diff_for_mmap;

      tensor_data = std::move(other.tensor_data);


      // Leave other in a valid but safe state

#ifndef _WIN32

      other.file_descriptor = -1;

#else

      other.h_file = INVALID_HANDLE_VALUE;

      other.h_map_file = NULL;

#endif

      other.mapped_tensor_data = nullptr;

      other.mapped_tensor_data_size = 0;

      other.offset_diff_for_mmap = 0;

    }

    return *this;

  }


};


ggml_types.h
Type definitions for GGML (Georgi Gerganov Machine Learning) library.

GGMLType
GGMLType
Enumeration of GGML tensor data types.
Definition ggml_types.h:21

GGUFValueType
GGUFValueType
Enumeration of value types used in GGUF metadata.
Definition ggml_types.h:51

GGUFMetadataValue
std::variant< uint8_t, int8_t, uint16_t, int16_t, uint32_t, int32_t, float, bool, std::string, uint64_t, int64_t, double, GGUFArray > GGUFMetadataValue
Type for storing metadata values of various types.
Definition gguf_structs.h:58

GGUFArray
Represents an array in GGUF metadata.
Definition gguf_structs.h:35

GGUFArray::type
GGUFValueType type
Definition gguf_structs.h:36

GGUFArray::len
uint64_t len
Definition gguf_structs.h:37

GGUFData
Complete representation of a GGUF file's contents.
Definition gguf_structs.h:80

GGUFData::MMapFailure
static const void * MMapFailure
Definition gguf_structs.h:95

GGUFData::operator=
GGUFData & operator=(GGUFData &&other) noexcept
Definition gguf_structs.h:183

GGUFData::data_alignment
uint64_t data_alignment
Definition gguf_structs.h:103

GGUFData::tensor_infos
std::vector< GGUFTensorInfo > tensor_infos
Definition gguf_structs.h:83

GGUFData::tokenizer_tokens
std::vector< std::string > tokenizer_tokens
Definition gguf_structs.h:87

GGUFData::tokenizer_scores
std::vector< float > tokenizer_scores
Definition gguf_structs.h:88

GGUFData::offset_diff_for_mmap
size_t offset_diff_for_mmap
Definition gguf_structs.h:104

GGUFData::tensor_data
std::vector< uint8_t > tensor_data
Definition gguf_structs.h:107

GGUFData::tokenizer_merges
std::vector< std::string > tokenizer_merges
Definition gguf_structs.h:90

GGUFData::mapped_tensor_data_size
size_t mapped_tensor_data_size
Definition gguf_structs.h:102

GGUFData::metadata
std::map< std::string, GGUFMetadataValue > metadata
Definition gguf_structs.h:82

GGUFData::GGUFData
GGUFData(GGUFData &&other) noexcept
Definition gguf_structs.h:149

GGUFData::file_descriptor
int file_descriptor
Definition gguf_structs.h:94

GGUFData::~GGUFData
~GGUFData()
Definition gguf_structs.h:117

GGUFData::GGUFData
GGUFData(const GGUFData &)=delete

GGUFData::mapped_tensor_data
void * mapped_tensor_data
Definition gguf_structs.h:101

GGUFData::tensor_infos_map
std::map< std::string, GGUFTensorInfo > tensor_infos_map
Definition gguf_structs.h:84

GGUFData::header
GGUFHeader header
Definition gguf_structs.h:81

GGUFData::tokenizer_token_types
std::vector< uint32_t > tokenizer_token_types
Definition gguf_structs.h:89

GGUFData::operator=
GGUFData & operator=(const GGUFData &)=delete

GGUFData::GGUFData
GGUFData()
Definition gguf_structs.h:111

GGUFHeader
Header structure for GGUF files.
Definition gguf_structs.h:43

GGUFHeader::tensor_count
uint64_t tensor_count
Definition gguf_structs.h:46

GGUFHeader::metadata_kv_count
uint64_t metadata_kv_count
Definition gguf_structs.h:47

GGUFHeader::version
uint32_t version
Definition gguf_structs.h:45

GGUFHeader::magic
uint32_t magic
Definition gguf_structs.h:44

GGUFTensorInfo
Information about a tensor stored in a GGUF file.
Definition gguf_structs.h:63

GGUFTensorInfo::type
GGMLType type
Definition gguf_structs.h:66

GGUFTensorInfo::size_in_bytes
size_t size_in_bytes
Definition gguf_structs.h:69

GGUFTensorInfo::num_elements
size_t num_elements
Definition gguf_structs.h:68

GGUFTensorInfo::offset
uint64_t offset
Definition gguf_structs.h:67

GGUFTensorInfo::shape
std::vector< uint64_t > shape
Definition gguf_structs.h:65

GGUFTensorInfo::name
std::string name
Definition gguf_structs.h:64