TinyLlama.cpp 1.0
A lightweight C++ implementation of the TinyLlama language model
Loading...
Searching...
No Matches
gguf_structs.h
Go to the documentation of this file.
1#pragma once
2
3#include <cstdint>
4#include <map>
5#include <string>
6#include <variant>
7#include <vector>
8
9// mmap related includes
10#ifndef _WIN32
11#include <sys/mman.h> // For mmap, munmap, MAP_FAILED, posix_madvise
12#include <sys/stat.h> // For fstat, stat
13#include <fcntl.h> // For O_RDONLY
14#include <unistd.h> // For close, fstat, read, lseek, sysconf, _SC_PAGE_SIZE
15#else
16#define WIN32_LEAN_AND_MEAN
17#include <windows.h> // For CreateFile, CreateFileMapping, MapViewOfFile, etc.
18 // Also for GetSystemInfo, SYSTEM_INFO, PrefetchVirtualMemory (if used)
19#endif
20
21#include "ggml_types.h"
22
35struct GGUFArray {
37 uint64_t len;
38};
39
43struct GGUFHeader {
44 uint32_t magic;
45 uint32_t version;
46 uint64_t tensor_count;
48};
49
57 std::variant<uint8_t, int8_t, uint16_t, int16_t, uint32_t, int32_t, float,
58 bool, std::string, uint64_t, int64_t, double, GGUFArray>;
59
64 std::string name;
65 std::vector<uint64_t> shape;
67 uint64_t offset;
68 size_t num_elements;
70};
71
80struct GGUFData {
82 std::map<std::string, GGUFMetadataValue> metadata;
83 std::vector<GGUFTensorInfo> tensor_infos;
84 std::map<std::string, GGUFTensorInfo> tensor_infos_map;
86 // Tokenizer-specific data
87 std::vector<std::string> tokenizer_tokens;
88 std::vector<float> tokenizer_scores;
89 std::vector<uint32_t> tokenizer_token_types;
90 std::vector<std::string> tokenizer_merges;
92 // Memory-mapped tensor data related fields
93#ifndef _WIN32
94 int file_descriptor = -1;
95 static const void* MMapFailure;
96#else
97 HANDLE h_file = INVALID_HANDLE_VALUE;
98 HANDLE h_map_file = NULL;
99 static constexpr void* const MMapFailure = NULL;
100#endif
101 void* mapped_tensor_data = nullptr;
103 uint64_t data_alignment = 32;
106 // Non-mmap tensor data (for when mmap is disabled)
107 std::vector<uint8_t> tensor_data;
109 // Default constructor
110#ifndef _WIN32
112#else
113 GGUFData() : h_file(INVALID_HANDLE_VALUE), h_map_file(NULL), mapped_tensor_data(nullptr), mapped_tensor_data_size(0), data_alignment(32), offset_diff_for_mmap(0) {}
114#endif
115
116 // Destructor to clean up memory map and file descriptor/handles
118#ifndef _WIN32
119 if (mapped_tensor_data != nullptr && mapped_tensor_data != MMapFailure) { // MMapFailure will expand to MAP_FAILED
121 }
122 if (file_descriptor != -1) {
123 close(file_descriptor);
124 }
125 file_descriptor = -1;
126#else // _WIN32
127 if (mapped_tensor_data != nullptr) { // On Windows, MapViewOfFile returns NULL on failure
128 UnmapViewOfFile(mapped_tensor_data);
129 }
130 if (h_map_file != NULL) {
131 CloseHandle(h_map_file);
132 }
133 if (h_file != INVALID_HANDLE_VALUE) {
134 CloseHandle(h_file);
135 }
136 h_file = INVALID_HANDLE_VALUE;
137 h_map_file = NULL;
138#endif
139 mapped_tensor_data = nullptr; // Common for both
140 mapped_tensor_data_size = 0; // Common for both
141 offset_diff_for_mmap = 0; // Common for both
142 }
143
144 // Prevent accidental copying
145 GGUFData(const GGUFData&) = delete;
146 GGUFData& operator=(const GGUFData&) = delete;
147
148 // Allow move semantics
149 GGUFData(GGUFData&& other) noexcept
150 : header(other.header)
151 , metadata(std::move(other.metadata))
152 , tensor_infos(std::move(other.tensor_infos))
153 , tensor_infos_map(std::move(other.tensor_infos_map))
154 , tokenizer_tokens(std::move(other.tokenizer_tokens))
155 , tokenizer_scores(std::move(other.tokenizer_scores))
156 , tokenizer_token_types(std::move(other.tokenizer_token_types))
157 , tokenizer_merges(std::move(other.tokenizer_merges))
158 // Platform-specific handles
159#ifndef _WIN32
160 , file_descriptor(other.file_descriptor)
161#else
162 , h_file(other.h_file)
163 , h_map_file(other.h_map_file)
164#endif
165 , mapped_tensor_data(other.mapped_tensor_data)
166 , mapped_tensor_data_size(other.mapped_tensor_data_size)
167 , data_alignment(other.data_alignment)
168 , offset_diff_for_mmap(other.offset_diff_for_mmap)
169 , tensor_data(std::move(other.tensor_data))
170 {
171 // Leave other in a valid but safe state (resources transferred)
172#ifndef _WIN32
173 other.file_descriptor = -1;
174#else
175 other.h_file = INVALID_HANDLE_VALUE;
176 other.h_map_file = NULL;
177#endif
178 other.mapped_tensor_data = nullptr;
179 other.mapped_tensor_data_size = 0;
180 other.offset_diff_for_mmap = 0;
181 }
182
183 GGUFData& operator=(GGUFData&& other) noexcept {
184 if (this != &other) {
185 // Clean up existing resources first (using this object's current platform state)
186#ifndef _WIN32
187 if (mapped_tensor_data != nullptr && mapped_tensor_data != MMapFailure) { // MMapFailure will expand to MAP_FAILED
189 }
190 if (file_descriptor != -1) {
191 close(file_descriptor);
192 }
193#else // _WIN32
194 if (mapped_tensor_data != nullptr) {
195 UnmapViewOfFile(mapped_tensor_data);
196 }
197 if (h_map_file != NULL) {
198 CloseHandle(h_map_file);
199 }
200 if (h_file != INVALID_HANDLE_VALUE) {
201 CloseHandle(h_file);
202 }
203#endif
204
205 // Move data members
206 header = other.header;
207 metadata = std::move(other.metadata);
208 tensor_infos = std::move(other.tensor_infos);
209 tensor_infos_map = std::move(other.tensor_infos_map);
210 tokenizer_tokens = std::move(other.tokenizer_tokens);
211 tokenizer_scores = std::move(other.tokenizer_scores);
212 tokenizer_token_types = std::move(other.tokenizer_token_types);
213 tokenizer_merges = std::move(other.tokenizer_merges);
214
215 // Move platform-specific handles and mmap data
216#ifndef _WIN32
217 file_descriptor = other.file_descriptor;
218#else
219 h_file = other.h_file;
220 h_map_file = other.h_map_file;
221#endif
222 mapped_tensor_data = other.mapped_tensor_data;
223 mapped_tensor_data_size = other.mapped_tensor_data_size;
224 data_alignment = other.data_alignment;
225 offset_diff_for_mmap = other.offset_diff_for_mmap;
226 tensor_data = std::move(other.tensor_data);
227
228 // Leave other in a valid but safe state
229#ifndef _WIN32
230 other.file_descriptor = -1;
231#else
232 other.h_file = INVALID_HANDLE_VALUE;
233 other.h_map_file = NULL;
234#endif
235 other.mapped_tensor_data = nullptr;
236 other.mapped_tensor_data_size = 0;
237 other.offset_diff_for_mmap = 0;
238 }
239 return *this;
240 }
241};
Type definitions for GGML (Georgi Gerganov Machine Learning) library.
GGMLType
Enumeration of GGML tensor data types.
Definition ggml_types.h:21
GGUFValueType
Enumeration of value types used in GGUF metadata.
Definition ggml_types.h:51
std::variant< uint8_t, int8_t, uint16_t, int16_t, uint32_t, int32_t, float, bool, std::string, uint64_t, int64_t, double, GGUFArray > GGUFMetadataValue
Type for storing metadata values of various types.
Represents an array in GGUF metadata.
GGUFValueType type
uint64_t len
Complete representation of a GGUF file's contents.
static const void * MMapFailure
GGUFData & operator=(GGUFData &&other) noexcept
uint64_t data_alignment
std::vector< GGUFTensorInfo > tensor_infos
std::vector< std::string > tokenizer_tokens
std::vector< float > tokenizer_scores
size_t offset_diff_for_mmap
std::vector< uint8_t > tensor_data
std::vector< std::string > tokenizer_merges
size_t mapped_tensor_data_size
std::map< std::string, GGUFMetadataValue > metadata
GGUFData(GGUFData &&other) noexcept
int file_descriptor
GGUFData(const GGUFData &)=delete
void * mapped_tensor_data
std::map< std::string, GGUFTensorInfo > tensor_infos_map
GGUFHeader header
std::vector< uint32_t > tokenizer_token_types
GGUFData & operator=(const GGUFData &)=delete
Header structure for GGUF files.
uint64_t tensor_count
uint64_t metadata_kv_count
uint32_t version
uint32_t magic
Information about a tensor stored in a GGUF file.
size_t size_in_bytes
size_t num_elements
uint64_t offset
std::vector< uint64_t > shape
std::string name