1#ifndef SAFETENSORS_LOADER_H
2#define SAFETENSORS_LOADER_H
17#include <nlohmann/json.hpp>
74 HANDLE file_handle_ = INVALID_HANDLE_VALUE;
75 HANDLE mapping_handle_ = NULL;
85 explicit Shard(
const std::string& fp);
220 void load_single_file(
const std::string& file_path,
const std::string& shard_key_override =
"");
241 const std::string& dtype)
const;
281 template <
class F,
class... Args>
282 std::future<
typename std::result_of<F(Args...)>::type>
submit(F&& f,
286 std::queue<std::function<void()>>
tasks_;
293template <
class F,
class... Args>
295 F&& f, Args&&... args) {
296 using return_type =
typename std::result_of<F(Args...)>::type;
298 auto task = std::make_shared<std::packaged_task<return_type()>>(
299 std::bind(std::forward<F>(f), std::forward<Args>(args)...));
301 std::future<return_type> res = task->get_future();
304 if (
stop_)
throw std::runtime_error(
"submit on stopped ThreadPool");
305 tasks_.emplace([task]() { (*task)(); });
Main class for loading tensors from SafeTensors format files (single or sharded)
std::map< std::string, std::unique_ptr< Shard > > loaded_shards_
std::map< std::string, TensorInfo > tensors_
SafeTensorsLoader(const SafeTensorsLoader &)=delete
const Shard * get_shard_for_tensor(const std::string &tensor_name) const
Get the Shard object for a given tensor name.
void load_from_directory(const std::string &directory_path)
Load tensors from a directory, handling index files and multiple shards.
static bool load_model_config_from_json(const std::string &model_path_or_dir, ModelConfig &config_to_populate)
Loads model configuration from a JSON file corresponding to a .safetensors model path.
std::map< std::string, std::string > tensor_name_to_shard_key_map_
void load_single_file(const std::string &file_path, const std::string &shard_key_override="")
Load a single .safetensors file as a shard.
std::map< std::string, std::vector< uint8_t > > load_all_tensors_parallel() const
Load all tensors in parallel.
std::vector< std::string > tensor_names() const
Get a list of all tensor names available in the loaded model.
const TensorInfo & get_tensor_info(const std::string &name) const
Get information about a specific tensor.
std::vector< uint8_t > convert_tensor_data(const uint8_t *data, size_t size, const std::string &dtype) const
Convert raw tensor data to FP32 if needed.
std::vector< uint8_t > get_tensor_bytes(const std::string &name) const
Get the raw bytes for a tensor, converting to FP32 if needed.
std::string model_load_path_
~SafeTensorsLoader()
Destructor. Cleans up all memory-mapped shards.
SafeTensorsLoader & operator=(const SafeTensorsLoader &)=delete
void parse_shard_metadata(Shard &shard, const std::string &shard_key)
Parse the metadata of a shard and populate tensor information.
Thread pool for parallel tensor loading operations.
~ThreadPool()
Destructor that ensures proper cleanup of threads.
std::future< typename std::result_of< F(Args...)>::type > submit(F &&f, Args &&... args)
Submits a task to the thread pool.
std::queue< std::function< void()> > tasks_
std::vector< std::thread > workers_
std::condition_variable condition_
Logging utilities for the TinyLlama implementation.
Model configuration structure holding architecture and hyperparameters.
Information about a tensor stored in the SafeTensors file(s)
std::vector< size_t > shape
Represents a memory-mapped SafeTensors file (shard).
~Shard()
Destructor. Cleans up memory mapping and file handles.
uint64_t metadata_size
Size of the metadata block in bytes.
const uint8_t * tensor_data_block_ptr
Pointer to the start of the tensor data block.
void * mapped_data
Pointer to the memory-mapped data.
std::string file_path
Path to the shard file.
const uint8_t * metadata_ptr
Pointer to the start of the metadata block.
Shard & operator=(Shard &&other) noexcept
Move assignment operator.
size_t file_size
Size of the mapped file in bytes.
const uint8_t * get_tensor_raw_data(size_t local_offset, size_t n_bytes) const
Get a pointer to the raw tensor data within this shard.