Structure holding all weights for a single transformer layer. More...

#include <model.h>

Collaboration diagram for LayerWeights:

Public Attributes
std::vector< uint16_t >	input_layernorm

std::vector< uint16_t >	post_attention_layernorm

std::vector< uint16_t >	q_proj

std::vector< uint16_t >	k_proj

std::vector< uint16_t >	v_proj

std::vector< uint16_t >	o_proj

std::vector< uint16_t >	gate_proj

std::vector< uint16_t >	up_proj

std::vector< uint16_t >	down_proj

std::vector< float >	input_layernorm_f32

std::vector< float >	post_attention_layernorm_f32

std::vector< float >	q_proj_f32

std::vector< float >	k_proj_f32

std::vector< float >	v_proj_f32

std::vector< float >	o_proj_f32

std::vector< float >	gate_proj_f32

std::vector< float >	up_proj_f32

std::vector< float >	down_proj_f32

std::vector< block_q4_K >	q_proj_q4k

std::vector< block_q4_K >	k_proj_q4k

std::vector< block_q4_K >	v_proj_q4k

std::vector< block_q4_K >	o_proj_q4k

std::vector< block_q4_K >	gate_proj_q4k

std::vector< block_q4_K >	up_proj_q4k

std::vector< block_q4_K >	down_proj_q4k

std::vector< block_q6_K >	q_proj_q6k

std::vector< block_q6_K >	k_proj_q6k

std::vector< block_q6_K >	v_proj_q6k

std::vector< block_q6_K >	o_proj_q6k

std::vector< block_q6_K >	gate_proj_q6k

std::vector< block_q6_K >	up_proj_q6k

std::vector< block_q6_K >	down_proj_q6k

std::vector< block_q8_0 >	q_proj_q8_0

std::vector< block_q8_0 >	k_proj_q8_0

std::vector< block_q8_0 >	v_proj_q8_0

std::vector< block_q8_0 >	o_proj_q8_0

std::vector< block_q8_0 >	gate_proj_q8_0

std::vector< block_q8_0 >	up_proj_q8_0

std::vector< block_q8_0 >	down_proj_q8_0

std::vector< block_q8_K >	q_proj_q8k

std::vector< block_q8_K >	k_proj_q8k

std::vector< block_q8_K >	v_proj_q8k

std::vector< block_q8_K >	o_proj_q8k

std::vector< block_q8_K >	gate_proj_q8k

std::vector< block_q8_K >	up_proj_q8k

std::vector< block_q8_K >	down_proj_q8k

Detailed Description

Structure holding all weights for a single transformer layer.

Contains projections for attention and MLP, as well as normalization weights, in various formats.

Definition at line 238 of file model.h.

Member Data Documentation

◆ down_proj

std::vector<uint16_t> LayerWeights::down_proj

Definition at line 249 of file model.h.

◆ down_proj_f32

std::vector<float> LayerWeights::down_proj_f32

Definition at line 254 of file model.h.

◆ down_proj_q4k

std::vector<block_q4_K> LayerWeights::down_proj_q4k

Definition at line 256 of file model.h.

◆ down_proj_q6k

std::vector<block_q6_K> LayerWeights::down_proj_q6k

Definition at line 258 of file model.h.

◆ down_proj_q8_0

std::vector<block_q8_0> LayerWeights::down_proj_q8_0

Definition at line 260 of file model.h.

◆ down_proj_q8k

std::vector<block_q8_K> LayerWeights::down_proj_q8k

Definition at line 262 of file model.h.

◆ gate_proj

std::vector<uint16_t> LayerWeights::gate_proj

Definition at line 247 of file model.h.

◆ gate_proj_f32

std::vector<float> LayerWeights::gate_proj_f32

Definition at line 254 of file model.h.

◆ gate_proj_q4k

std::vector<block_q4_K> LayerWeights::gate_proj_q4k

Definition at line 256 of file model.h.

◆ gate_proj_q6k

std::vector<block_q6_K> LayerWeights::gate_proj_q6k

Definition at line 258 of file model.h.

◆ gate_proj_q8_0

std::vector<block_q8_0> LayerWeights::gate_proj_q8_0

Definition at line 260 of file model.h.

◆ gate_proj_q8k

std::vector<block_q8_K> LayerWeights::gate_proj_q8k

Definition at line 262 of file model.h.

◆ input_layernorm

std::vector<uint16_t> LayerWeights::input_layernorm

Definition at line 239 of file model.h.

◆ input_layernorm_f32

std::vector<float> LayerWeights::input_layernorm_f32

Definition at line 251 of file model.h.

◆ k_proj

std::vector<uint16_t> LayerWeights::k_proj

Definition at line 243 of file model.h.

◆ k_proj_f32

std::vector<float> LayerWeights::k_proj_f32

Definition at line 253 of file model.h.

◆ k_proj_q4k

std::vector<block_q4_K> LayerWeights::k_proj_q4k

Definition at line 255 of file model.h.

◆ k_proj_q6k

std::vector<block_q6_K> LayerWeights::k_proj_q6k

Definition at line 257 of file model.h.

◆ k_proj_q8_0

std::vector<block_q8_0> LayerWeights::k_proj_q8_0

Definition at line 259 of file model.h.

◆ k_proj_q8k

std::vector<block_q8_K> LayerWeights::k_proj_q8k

Definition at line 261 of file model.h.

◆ o_proj

std::vector<uint16_t> LayerWeights::o_proj

Definition at line 245 of file model.h.

◆ o_proj_f32

std::vector<float> LayerWeights::o_proj_f32

Definition at line 253 of file model.h.

◆ o_proj_q4k

std::vector<block_q4_K> LayerWeights::o_proj_q4k

Definition at line 255 of file model.h.

◆ o_proj_q6k

std::vector<block_q6_K> LayerWeights::o_proj_q6k

Definition at line 257 of file model.h.

◆ o_proj_q8_0

std::vector<block_q8_0> LayerWeights::o_proj_q8_0

Definition at line 259 of file model.h.

◆ o_proj_q8k

std::vector<block_q8_K> LayerWeights::o_proj_q8k

Definition at line 261 of file model.h.

◆ post_attention_layernorm

std::vector<uint16_t> LayerWeights::post_attention_layernorm

Definition at line 240 of file model.h.

◆ post_attention_layernorm_f32

std::vector<float> LayerWeights::post_attention_layernorm_f32

Definition at line 252 of file model.h.

◆ q_proj

std::vector<uint16_t> LayerWeights::q_proj

Definition at line 242 of file model.h.

◆ q_proj_f32

std::vector<float> LayerWeights::q_proj_f32

Definition at line 253 of file model.h.

◆ q_proj_q4k

std::vector<block_q4_K> LayerWeights::q_proj_q4k

Definition at line 255 of file model.h.

◆ q_proj_q6k

std::vector<block_q6_K> LayerWeights::q_proj_q6k

Definition at line 257 of file model.h.

◆ q_proj_q8_0

std::vector<block_q8_0> LayerWeights::q_proj_q8_0

Definition at line 259 of file model.h.

◆ q_proj_q8k

std::vector<block_q8_K> LayerWeights::q_proj_q8k

Definition at line 261 of file model.h.

◆ up_proj

std::vector<uint16_t> LayerWeights::up_proj

Definition at line 248 of file model.h.

◆ up_proj_f32

std::vector<float> LayerWeights::up_proj_f32

Definition at line 254 of file model.h.

◆ up_proj_q4k

std::vector<block_q4_K> LayerWeights::up_proj_q4k

Definition at line 256 of file model.h.

◆ up_proj_q6k

std::vector<block_q6_K> LayerWeights::up_proj_q6k

Definition at line 258 of file model.h.

◆ up_proj_q8_0

std::vector<block_q8_0> LayerWeights::up_proj_q8_0

Definition at line 260 of file model.h.

◆ up_proj_q8k

std::vector<block_q8_K> LayerWeights::up_proj_q8k

Definition at line 262 of file model.h.

◆ v_proj

std::vector<uint16_t> LayerWeights::v_proj

Definition at line 244 of file model.h.

◆ v_proj_f32

std::vector<float> LayerWeights::v_proj_f32

Definition at line 253 of file model.h.

◆ v_proj_q4k

std::vector<block_q4_K> LayerWeights::v_proj_q4k

Definition at line 255 of file model.h.

◆ v_proj_q6k

std::vector<block_q6_K> LayerWeights::v_proj_q6k

Definition at line 257 of file model.h.

◆ v_proj_q8_0

std::vector<block_q8_0> LayerWeights::v_proj_q8_0

Definition at line 259 of file model.h.

◆ v_proj_q8k

std::vector<block_q8_K> LayerWeights::v_proj_q8k

Definition at line 261 of file model.h.

The documentation for this struct was generated from the following file:

model.h

Public Attributes

Detailed Description

Member Data Documentation

◆ down_proj

◆ down_proj_f32

◆ down_proj_q4k

◆ down_proj_q6k

◆ down_proj_q8_0

◆ down_proj_q8k

◆ gate_proj

◆ gate_proj_f32

◆ gate_proj_q4k

◆ gate_proj_q6k

◆ gate_proj_q8_0

◆ gate_proj_q8k

◆ input_layernorm

◆ input_layernorm_f32

◆ k_proj

◆ k_proj_f32

◆ k_proj_q4k

◆ k_proj_q6k

◆ k_proj_q8_0

◆ k_proj_q8k

◆ o_proj

◆ o_proj_f32

◆ o_proj_q4k

◆ o_proj_q6k

◆ o_proj_q8_0

◆ o_proj_q8k

◆ post_attention_layernorm

◆ post_attention_layernorm_f32

◆ q_proj

◆ q_proj_f32

◆ q_proj_q4k

◆ q_proj_q6k

◆ q_proj_q8_0

◆ q_proj_q8k

◆ up_proj

◆ up_proj_f32

◆ up_proj_q4k

◆ up_proj_q6k

◆ up_proj_q8_0

◆ up_proj_q8k

◆ v_proj

◆ v_proj_f32

◆ v_proj_q4k

◆ v_proj_q6k

◆ v_proj_q8_0

◆ v_proj_q8k