TinyLlama.cpp 1.0
A lightweight C++ implementation of the TinyLlama language model
Loading...
Searching...
No Matches
model_constants.h
Go to the documentation of this file.
1#pragma once
2
3#include <cstdint>
4
21namespace bfloat16 {
22 constexpr uint16_t EXPONENT_MASK = 0x7F80;
23 constexpr uint16_t MANTISSA_MASK = 0x007F;
24 constexpr uint16_t SIGN_BIT = 0x8000;
25 constexpr uint16_t ZERO = 0x0000;
26 constexpr uint16_t NEG_ZERO = 0x8000;
27 constexpr int SHIFT_BITS = 16;
28}
29
37namespace attention {
38 constexpr float ATTENTION_SCALE_DEFAULT = 0.125f; // 1/sqrt(64) for typical head_dim=64
39 constexpr float ATTENTION_SCALE_BASE = 1.0f; // MUST BE 1.0f for correct scaling when combined with config's attention_softmax_scale
40
41 constexpr float MIN_SCALE = 1e-4f;
42 constexpr float MAX_SCALE = 1e4f;
44 // For GQA/MQA, if num_kv_heads < num_q_heads, kv_repetition_factor > 1
46}
47
55namespace rope {
56 constexpr float ROPE_THETA = 10000.0f;
57 constexpr int MAX_SEQUENCE_LENGTH = 2048;
58}
59
67namespace numeric {
68 constexpr float MIN_NORM_EPS = 1e-5f;
69 constexpr float DEFAULT_EPS = 1e-6f;
70 constexpr float MAX_LOGIT_THRESHOLD = 100.0f;
71}
Constants for attention mechanism calculations.
constexpr int KV_REPETITION_FACTOR_DEFAULT
constexpr float MIN_SCALE
constexpr float ATTENTION_SCALE_BASE
constexpr float MAX_SCALE
constexpr float ATTENTION_SCALE_DEFAULT
Constants for BFloat16 number format handling.
constexpr uint16_t ZERO
constexpr uint16_t SIGN_BIT
constexpr uint16_t NEG_ZERO
constexpr uint16_t EXPONENT_MASK
constexpr uint16_t MANTISSA_MASK
constexpr int SHIFT_BITS
Constants for ensuring numeric stability.
constexpr float DEFAULT_EPS
constexpr float MIN_NORM_EPS
constexpr float MAX_LOGIT_THRESHOLD
Constants for Rotary Position Embedding (RoPE)
constexpr float ROPE_THETA
constexpr int MAX_SEQUENCE_LENGTH