TinyLlama.cpp 1.0
A lightweight C++ implementation of the TinyLlama language model
Loading...
Searching...
No Matches
Functions
safetensors_loader.cpp File Reference
#include "safetensors_loader.h"
#include "model.h"
#include "logger.h"
#include "model_macros.h"
#include <fstream>
#include <stdexcept>
#include <nlohmann/json.hpp>
#include <algorithm>
#include <cctype>
#include <vector>
#include <string>
#include <map>
#include <memory>
#include <filesystem>
#include <sys/stat.h>
#include <cerrno>
Include dependency graph for safetensors_loader.cpp:

Go to the source code of this file.

Functions

float cpu_bf16_to_float32 (uint16_t bf16_raw)
 
float cpu_f16_to_float32 (uint16_t f16_raw)
 

Function Documentation

◆ cpu_bf16_to_float32()

float cpu_bf16_to_float32 ( uint16_t  bf16_raw)
inline

Definition at line 31 of file safetensors_loader.cpp.

31 {
32 unsigned int bits = ((unsigned int)bf16_raw) << 16;
33 float result;
34 memcpy(&result, &bits, sizeof(float));
35 return result;
36}

Referenced by SafeTensorsLoader::convert_tensor_data().

◆ cpu_f16_to_float32()

float cpu_f16_to_float32 ( uint16_t  f16_raw)
inline

Definition at line 37 of file safetensors_loader.cpp.

37 {
38 const uint32_t sign_mask_f16 = 0x8000;
39 const uint32_t exp_mask_f16 = 0x7C00;
40 const uint32_t mant_mask_f16 = 0x03FF;
41 const int32_t exp_bias_f16 = 15;
42 const int32_t exp_bias_f32 = 127;
43
44 uint32_t sign_f32 = (static_cast<uint32_t>(f16_raw & sign_mask_f16)) << 16;
45 int32_t exp_f16 = (f16_raw & exp_mask_f16) >> 10;
46 uint32_t mant_f16 = (f16_raw & mant_mask_f16);
47
48 uint32_t f32_bits;
49
50 if (exp_f16 == 0x1F) { // F16 NaN or Inf
51 f32_bits = sign_f32 | 0x7F800000U | (mant_f16 << 13); // Propagate mantissa for NaN
52 } else if (exp_f16 == 0) { // F16 zero or subnormal
53 if (mant_f16 == 0) { // Zero
54 f32_bits = sign_f32;
55 } else { // Subnormal F16 to normal or subnormal F32
56 int32_t s = -1;
57 mant_f16 <<= 1;
58 while ((mant_f16 & 0x0400) == 0) {
59 mant_f16 <<= 1;
60 s--;
61 }
62 mant_f16 &= 0x03FF; // Clear leading 1
63 int32_t f32_exp_val = (1 - exp_bias_f16) + s + exp_bias_f32;
64 if (f32_exp_val <= 0) { // Result is subnormal F32 or zero
65 int32_t shift = 1 - f32_exp_val;
66 if (shift > 23) { // Underflow to zero
67 f32_bits = sign_f32;
68 } else {
69 f32_bits = sign_f32 | ((mant_f16 << 13) >> shift) ;
70 }
71 } else { // Result is normal F32
72 f32_bits = sign_f32 | (static_cast<uint32_t>(f32_exp_val) << 23) | (mant_f16 << 13);
73 }
74 }
75 } else { // Normal F16
76 int32_t f32_exp = exp_f16 - exp_bias_f16 + exp_bias_f32;
77 f32_bits = sign_f32 | (static_cast<uint32_t>(f32_exp) << 23) | (mant_f16 << 13);
78 }
79
80 float result;
81 memcpy(&result, &f32_bits, sizeof(float));
82 return result;
83}

Referenced by SafeTensorsLoader::convert_tensor_data().