|
TinyLlama.cpp 1.0
A lightweight C++ implementation of the TinyLlama language model
|
Structure holding all weights for a single transformer layer. More...
#include <model.h>

Public Attributes | |
| std::vector< uint16_t > | input_layernorm |
| std::vector< uint16_t > | post_attention_layernorm |
| std::vector< uint16_t > | q_proj |
| std::vector< uint16_t > | k_proj |
| std::vector< uint16_t > | v_proj |
| std::vector< uint16_t > | o_proj |
| std::vector< uint16_t > | gate_proj |
| std::vector< uint16_t > | up_proj |
| std::vector< uint16_t > | down_proj |
| std::vector< float > | input_layernorm_f32 |
| std::vector< float > | post_attention_layernorm_f32 |
| std::vector< float > | q_proj_f32 |
| std::vector< float > | k_proj_f32 |
| std::vector< float > | v_proj_f32 |
| std::vector< float > | o_proj_f32 |
| std::vector< float > | gate_proj_f32 |
| std::vector< float > | up_proj_f32 |
| std::vector< float > | down_proj_f32 |
| std::vector< block_q4_K > | q_proj_q4k |
| std::vector< block_q4_K > | k_proj_q4k |
| std::vector< block_q4_K > | v_proj_q4k |
| std::vector< block_q4_K > | o_proj_q4k |
| std::vector< block_q4_K > | gate_proj_q4k |
| std::vector< block_q4_K > | up_proj_q4k |
| std::vector< block_q4_K > | down_proj_q4k |
| std::vector< block_q6_K > | q_proj_q6k |
| std::vector< block_q6_K > | k_proj_q6k |
| std::vector< block_q6_K > | v_proj_q6k |
| std::vector< block_q6_K > | o_proj_q6k |
| std::vector< block_q6_K > | gate_proj_q6k |
| std::vector< block_q6_K > | up_proj_q6k |
| std::vector< block_q6_K > | down_proj_q6k |
| std::vector< block_q8_0 > | q_proj_q8_0 |
| std::vector< block_q8_0 > | k_proj_q8_0 |
| std::vector< block_q8_0 > | v_proj_q8_0 |
| std::vector< block_q8_0 > | o_proj_q8_0 |
| std::vector< block_q8_0 > | gate_proj_q8_0 |
| std::vector< block_q8_0 > | up_proj_q8_0 |
| std::vector< block_q8_0 > | down_proj_q8_0 |
| std::vector< block_q8_K > | q_proj_q8k |
| std::vector< block_q8_K > | k_proj_q8k |
| std::vector< block_q8_K > | v_proj_q8k |
| std::vector< block_q8_K > | o_proj_q8k |
| std::vector< block_q8_K > | gate_proj_q8k |
| std::vector< block_q8_K > | up_proj_q8k |
| std::vector< block_q8_K > | down_proj_q8k |
Structure holding all weights for a single transformer layer.
Contains projections for attention and MLP, as well as normalization weights, in various formats.
| std::vector<block_q4_K> LayerWeights::down_proj_q4k |
| std::vector<block_q6_K> LayerWeights::down_proj_q6k |
| std::vector<block_q8_0> LayerWeights::down_proj_q8_0 |
| std::vector<block_q8_K> LayerWeights::down_proj_q8k |
| std::vector<block_q4_K> LayerWeights::gate_proj_q4k |
| std::vector<block_q6_K> LayerWeights::gate_proj_q6k |
| std::vector<block_q8_0> LayerWeights::gate_proj_q8_0 |
| std::vector<block_q8_K> LayerWeights::gate_proj_q8k |
| std::vector<block_q4_K> LayerWeights::k_proj_q4k |
| std::vector<block_q6_K> LayerWeights::k_proj_q6k |
| std::vector<block_q8_0> LayerWeights::k_proj_q8_0 |
| std::vector<block_q8_K> LayerWeights::k_proj_q8k |
| std::vector<block_q4_K> LayerWeights::o_proj_q4k |
| std::vector<block_q6_K> LayerWeights::o_proj_q6k |
| std::vector<block_q8_0> LayerWeights::o_proj_q8_0 |
| std::vector<block_q8_K> LayerWeights::o_proj_q8k |
| std::vector<uint16_t> LayerWeights::post_attention_layernorm |
| std::vector<float> LayerWeights::post_attention_layernorm_f32 |
| std::vector<block_q4_K> LayerWeights::q_proj_q4k |
| std::vector<block_q6_K> LayerWeights::q_proj_q6k |
| std::vector<block_q8_0> LayerWeights::q_proj_q8_0 |
| std::vector<block_q8_K> LayerWeights::q_proj_q8k |
| std::vector<block_q4_K> LayerWeights::up_proj_q4k |
| std::vector<block_q6_K> LayerWeights::up_proj_q6k |
| std::vector<block_q8_0> LayerWeights::up_proj_q8_0 |
| std::vector<block_q8_K> LayerWeights::up_proj_q8k |
| std::vector<block_q4_K> LayerWeights::v_proj_q4k |
| std::vector<block_q6_K> LayerWeights::v_proj_q6k |
| std::vector<block_q8_0> LayerWeights::v_proj_q8_0 |
| std::vector<block_q8_K> LayerWeights::v_proj_q8k |