This is the complete list of members for TinyLlamaModel, including all inherited members.

clear_layer_dequantized_weights(int layer_idx)	TinyLlamaModel
config_	TinyLlamaModel	private
cpu_batch_processor_	TinyLlamaModel	private
CPUBatchProcessor	TinyLlamaModel	friend
embed_tokens	TinyLlamaModel	private
embed_tokens_f32	TinyLlamaModel	private
embed_tokens_q4k	TinyLlamaModel	private
embed_tokens_q6k	TinyLlamaModel	private
embed_tokens_q8_0	TinyLlamaModel	private
embed_tokens_q8k	TinyLlamaModel	private
ensure_bf16_concatenated_weights_loaded()	TinyLlamaModel
ensure_down_proj_dequantized(int layer_idx)	TinyLlamaModel
ensure_embed_tokens_dequantized()	TinyLlamaModel
ensure_f32_concatenated_weights_loaded()	TinyLlamaModel
ensure_gate_proj_dequantized(int layer_idx)	TinyLlamaModel
ensure_k_proj_dequantized(int layer_idx)	TinyLlamaModel
ensure_layer_weights_on_gpu(int layer_idx)	TinyLlamaModel
ensure_lm_head_dequantized()	TinyLlamaModel
ensure_o_proj_dequantized(int layer_idx)	TinyLlamaModel
ensure_q_proj_dequantized(int layer_idx)	TinyLlamaModel
ensure_up_proj_dequantized(int layer_idx)	TinyLlamaModel
ensure_v_proj_dequantized(int layer_idx)	TinyLlamaModel
f32_concatenated_weights_loaded_	TinyLlamaModel	private
final_norm	TinyLlamaModel	private
final_norm_f32	TinyLlamaModel	private
final_norm_q4k	TinyLlamaModel	private
final_norm_q6k	TinyLlamaModel	private
forward(std::vector< float > &input, int n_tokens, KVCache kv_cache, const std::vector< int > attention_mask)	TinyLlamaModel
forward_cpu_batch(const std::vector< float > &batch_input_activations, int num_tokens_in_batch, int num_cpu_layers_to_process, int start_pos_in_sequence, KVCache *kv_cache, const std::vector< int > &prompt_lengths={})	TinyLlamaModel
forward_cpu_batch_generation(const std::vector< float > &batch_input_activations, const std::vector< int > &token_positions, const std::vector< int > &original_sequence_indices, int num_tokens_in_batch, KVCache *kv_cache)	TinyLlamaModel
forward_cpu_logits_batch(const std::vector< float > &final_batch_activations, int num_tokens_in_batch)	TinyLlamaModel
free_bf16_concatenated_weights()	TinyLlamaModel
free_layer_gpu_weights(int layer_idx)	TinyLlamaModel
get_config() const	TinyLlamaModel	inline
get_embed_tokens() const	TinyLlamaModel	inline
get_gguf_data() const	TinyLlamaModel	inline
get_gguf_data_ptr()	TinyLlamaModel	inline
get_layers()	TinyLlamaModel	inline
get_lm_head() const	TinyLlamaModel	inline
get_vocab_size() const	TinyLlamaModel
gguf_data_	TinyLlamaModel	private
initialize_gpu_and_rope()	TinyLlamaModel
initialize_rope_freqs()	TinyLlamaModel
initialize_weights(const SafeTensorsLoader loader, const GGUFData gguf)	TinyLlamaModel	private
layers	TinyLlamaModel	private
lm_head	TinyLlamaModel	private
lm_head_f32	TinyLlamaModel	private
lm_head_q4k	TinyLlamaModel	private
lm_head_q6k	TinyLlamaModel	private
lm_head_q8_0	TinyLlamaModel	private
lm_head_q8k	TinyLlamaModel	private
lookup_embedding(int token_id)	TinyLlamaModel
map_gguf_weights	TinyLlamaModel	friend
model_path_	TinyLlamaModel	private
precomputed_freqs_cis_	TinyLlamaModel	private
smart_gemm_batch_cuda(bool transa_user, bool transb_user, int m_user, int n_user, int k_user, const float alpha_user, const float A_f32_user, int lda_user, const float B_f32_user, int ldb_user, const float beta_user, float C_f32_user, int ldc_user, cudaStream_t stream, const char operation_name="GEMM")	TinyLlamaModel
TinyLlamaModel(const ModelConfig &config, const SafeTensorsLoader &loader)	TinyLlamaModel
TinyLlamaModel(const ModelConfig &initial_config, const std::string &model_path)	TinyLlamaModel
TinyLlamaModel(const ModelConfig &config_from_session, std::unique_ptr< GGUFData > gguf_data_from_session)	TinyLlamaModel
use_bf16_tensor_cores_	TinyLlamaModel	private
~TinyLlamaModel()	TinyLlamaModel