tinyllama.cpp/safetensors__loader_8h_source.html

#ifndef SAFETENSORS_LOADER_H

#define SAFETENSORS_LOADER_H


#ifdef _WIN32

#include <windows.h>

#else

#include <fcntl.h>

#include <sys/mman.h>

#include <unistd.h>

#endif


#include <functional>

#include <future>

#include <map>

#include <memory>

#include <mutex>

#include <nlohmann/json.hpp>

#include <queue>

#include <stdexcept>

#include <string>

#include <thread>

#include <vector>

#include <filesystem> // For directory operations, C++17


#include "logger.h" // Assuming Logger is accessible


struct ModelConfig; // Forward declaration


class ThreadPool; // Forward declaration


struct Shard {

    std::string file_path;


    void* mapped_data = nullptr;


    size_t file_size = 0;


    uint64_t metadata_size = 0;


    const uint8_t* metadata_ptr = nullptr;


    const uint8_t* tensor_data_block_ptr = nullptr;


#ifdef _WIN32

    HANDLE file_handle_ = INVALID_HANDLE_VALUE;

    HANDLE mapping_handle_ = NULL;

#else

    int fd_ = -1;

#endif


    explicit Shard(const std::string& fp);


    ~Shard();


    Shard(Shard&& other) noexcept;


    Shard& operator=(Shard&& other) noexcept;


    const uint8_t* get_tensor_raw_data(size_t local_offset, size_t n_bytes) const;

};


class SafeTensorsLoader {

 public:


  struct TensorInfo {

    std::string name;

    std::string dtype;

    std::vector<size_t> shape;

    size_t data_offset;

    size_t nbytes;

    std::string shard_key;

  };


  explicit SafeTensorsLoader(const std::string& model_load_path);


  ~SafeTensorsLoader();


  SafeTensorsLoader(const SafeTensorsLoader&) = delete;

  SafeTensorsLoader& operator=(const SafeTensorsLoader&) = delete;


  std::vector<std::string> tensor_names() const;


  std::vector<uint8_t> get_tensor_bytes(const std::string& name) const;


  const TensorInfo& get_tensor_info(const std::string& name) const;


  std::map<std::string, std::vector<uint8_t>> load_all_tensors_parallel() const;


  static bool load_model_config_from_json(const std::string& model_path_or_dir, ModelConfig& config_to_populate);


 private:

  std::string model_load_path_;

  bool is_sharded_ = false;

  std::map<std::string, TensorInfo> tensors_;

  std::map<std::string, std::unique_ptr<Shard>> loaded_shards_;

  // If sharded via an index file, this maps tensor names directly to their shard key.

  // If not sharded or sharded by pattern, this might be populated differently or less used.

  std::map<std::string, std::string> tensor_name_to_shard_key_map_;


  void load_from_directory(const std::string& directory_path);


  void load_single_file(const std::string& file_path, const std::string& shard_key_override = "");


  void parse_shard_metadata(Shard& shard, const std::string& shard_key);


  std::vector<uint8_t> convert_tensor_data(const uint8_t* data, size_t size,

                                           const std::string& dtype) const;


  const Shard* get_shard_for_tensor(const std::string& tensor_name) const;

};


class ThreadPool {

 public:

  explicit ThreadPool(size_t num_threads);


  ~ThreadPool();


  template <class F, class... Args>

  std::future<typename std::result_of<F(Args...)>::type> submit(F&& f,

                                                               Args&&... args);

 private:

  std::vector<std::thread> workers_;

  std::queue<std::function<void()>> tasks_;

  std::mutex queue_mutex_;

  std::condition_variable condition_;

  bool stop_ = false;

};


// Template implementation for ThreadPool::submit

template <class F, class... Args>


std::future<typename std::result_of<F(Args...)>::type> ThreadPool::submit(

    F&& f, Args&&... args) {

  using return_type = typename std::result_of<F(Args...)>::type;


  auto task = std::make_shared<std::packaged_task<return_type()>>(

      std::bind(std::forward<F>(f), std::forward<Args>(args)...));


  std::future<return_type> res = task->get_future();

  {

    std::unique_lock<std::mutex> lock(queue_mutex_);

    if (stop_) throw std::runtime_error("submit on stopped ThreadPool");

    tasks_.emplace([task]() { (*task)(); });

  }

  condition_.notify_one();

  return res;

}


#endif // SAFETENSORS_LOADER_H

SafeTensorsLoader
Main class for loading tensors from SafeTensors format files (single or sharded)
Definition safetensors_loader.h:120

SafeTensorsLoader::loaded_shards_
std::map< std::string, std::unique_ptr< Shard > > loaded_shards_
Definition safetensors_loader.h:198

SafeTensorsLoader::is_sharded_
bool is_sharded_
Definition safetensors_loader.h:195

SafeTensorsLoader::tensors_
std::map< std::string, TensorInfo > tensors_
Definition safetensors_loader.h:197

SafeTensorsLoader::SafeTensorsLoader
SafeTensorsLoader(const SafeTensorsLoader &)=delete

SafeTensorsLoader::get_shard_for_tensor
const Shard * get_shard_for_tensor(const std::string &tensor_name) const
Get the Shard object for a given tensor name.
Definition safetensors_loader.cpp:514

SafeTensorsLoader::load_from_directory
void load_from_directory(const std::string &directory_path)
Load tensors from a directory, handling index files and multiple shards.
Definition safetensors_loader.cpp:318

SafeTensorsLoader::load_model_config_from_json
static bool load_model_config_from_json(const std::string &model_path_or_dir, ModelConfig &config_to_populate)
Loads model configuration from a JSON file corresponding to a .safetensors model path.
Definition safetensors_loader.cpp:607

SafeTensorsLoader::tensor_name_to_shard_key_map_
std::map< std::string, std::string > tensor_name_to_shard_key_map_
Definition safetensors_loader.h:202

SafeTensorsLoader::load_single_file
void load_single_file(const std::string &file_path, const std::string &shard_key_override="")
Load a single .safetensors file as a shard.
Definition safetensors_loader.cpp:413

SafeTensorsLoader::load_all_tensors_parallel
std::map< std::string, std::vector< uint8_t > > load_all_tensors_parallel() const
Load all tensors in parallel.
Definition safetensors_loader.cpp:544

SafeTensorsLoader::tensor_names
std::vector< std::string > tensor_names() const
Get a list of all tensor names available in the loaded model.
Definition safetensors_loader.cpp:497

SafeTensorsLoader::get_tensor_info
const TensorInfo & get_tensor_info(const std::string &name) const
Get information about a specific tensor.
Definition safetensors_loader.cpp:506

SafeTensorsLoader::convert_tensor_data
std::vector< uint8_t > convert_tensor_data(const uint8_t *data, size_t size, const std::string &dtype) const
Convert raw tensor data to FP32 if needed.
Definition safetensors_loader.cpp:580

SafeTensorsLoader::get_tensor_bytes
std::vector< uint8_t > get_tensor_bytes(const std::string &name) const
Get the raw bytes for a tensor, converting to FP32 if needed.
Definition safetensors_loader.cpp:536

SafeTensorsLoader::model_load_path_
std::string model_load_path_
Definition safetensors_loader.h:194

SafeTensorsLoader::~SafeTensorsLoader
~SafeTensorsLoader()
Destructor. Cleans up all memory-mapped shards.
Definition safetensors_loader.cpp:312

SafeTensorsLoader::operator=
SafeTensorsLoader & operator=(const SafeTensorsLoader &)=delete

SafeTensorsLoader::parse_shard_metadata
void parse_shard_metadata(Shard &shard, const std::string &shard_key)
Parse the metadata of a shard and populate tensor information.
Definition safetensors_loader.cpp:431

ThreadPool
Thread pool for parallel tensor loading operations.
Definition safetensors_loader.h:260

ThreadPool::~ThreadPool
~ThreadPool()
Destructor that ensures proper cleanup of threads.
Definition safetensors_loader.cpp:704

ThreadPool::submit
std::future< typename std::result_of< F(Args...)>::type > submit(F &&f, Args &&... args)
Submits a task to the thread pool.
Definition safetensors_loader.h:294

ThreadPool::stop_
bool stop_
Definition safetensors_loader.h:289

ThreadPool::tasks_
std::queue< std::function< void()> > tasks_
Definition safetensors_loader.h:286

ThreadPool::workers_
std::vector< std::thread > workers_
Definition safetensors_loader.h:285

ThreadPool::condition_
std::condition_variable condition_
Definition safetensors_loader.h:288

ThreadPool::queue_mutex_
std::mutex queue_mutex_
Definition safetensors_loader.h:287

logger.h
Logging utilities for the TinyLlama implementation.

ModelConfig
Model configuration structure holding architecture and hyperparameters.
Definition model.h:80

SafeTensorsLoader::TensorInfo
Information about a tensor stored in the SafeTensors file(s)
Definition safetensors_loader.h:125

SafeTensorsLoader::TensorInfo::nbytes
size_t nbytes
Definition safetensors_loader.h:130

SafeTensorsLoader::TensorInfo::name
std::string name
Definition safetensors_loader.h:126

SafeTensorsLoader::TensorInfo::shard_key
std::string shard_key
Definition safetensors_loader.h:131

SafeTensorsLoader::TensorInfo::shape
std::vector< size_t > shape
Definition safetensors_loader.h:128

SafeTensorsLoader::TensorInfo::data_offset
size_t data_offset
Definition safetensors_loader.h:129

SafeTensorsLoader::TensorInfo::dtype
std::string dtype
Definition safetensors_loader.h:127

Shard
Represents a memory-mapped SafeTensors file (shard).
Definition safetensors_loader.h:42

Shard::~Shard
~Shard()
Destructor. Cleans up memory mapping and file handles.
Definition safetensors_loader.cpp:167

Shard::metadata_size
uint64_t metadata_size
Size of the metadata block in bytes.
Definition safetensors_loader.h:61

Shard::tensor_data_block_ptr
const uint8_t * tensor_data_block_ptr
Pointer to the start of the tensor data block.
Definition safetensors_loader.h:71

Shard::fd_
int fd_
Definition safetensors_loader.h:77

Shard::mapped_data
void * mapped_data
Pointer to the memory-mapped data.
Definition safetensors_loader.h:51

Shard::file_path
std::string file_path
Path to the shard file.
Definition safetensors_loader.h:46

Shard::metadata_ptr
const uint8_t * metadata_ptr
Pointer to the start of the metadata block.
Definition safetensors_loader.h:66

Shard::operator=
Shard & operator=(Shard &&other) noexcept
Move assignment operator.
Definition safetensors_loader.cpp:231

Shard::file_size
size_t file_size
Size of the mapped file in bytes.
Definition safetensors_loader.h:56

Shard::get_tensor_raw_data
const uint8_t * get_tensor_raw_data(size_t local_offset, size_t n_bytes) const
Get a pointer to the raw tensor data within this shard.
Definition safetensors_loader.cpp:261