TinyLlama.cpp 1.0
A lightweight C++ implementation of the TinyLlama language model
Loading...
Searching...
No Matches
TinyLlamaModel Member List

This is the complete list of members for TinyLlamaModel, including all inherited members.

clear_layer_dequantized_weights(int layer_idx)TinyLlamaModel
config_TinyLlamaModelprivate
cpu_batch_processor_TinyLlamaModelprivate
CPUBatchProcessorTinyLlamaModelfriend
embed_tokensTinyLlamaModelprivate
embed_tokens_f32TinyLlamaModelprivate
embed_tokens_q4kTinyLlamaModelprivate
embed_tokens_q6kTinyLlamaModelprivate
embed_tokens_q8_0TinyLlamaModelprivate
embed_tokens_q8kTinyLlamaModelprivate
ensure_bf16_concatenated_weights_loaded()TinyLlamaModel
ensure_down_proj_dequantized(int layer_idx)TinyLlamaModel
ensure_embed_tokens_dequantized()TinyLlamaModel
ensure_f32_concatenated_weights_loaded()TinyLlamaModel
ensure_gate_proj_dequantized(int layer_idx)TinyLlamaModel
ensure_k_proj_dequantized(int layer_idx)TinyLlamaModel
ensure_layer_weights_on_gpu(int layer_idx)TinyLlamaModel
ensure_lm_head_dequantized()TinyLlamaModel
ensure_o_proj_dequantized(int layer_idx)TinyLlamaModel
ensure_q_proj_dequantized(int layer_idx)TinyLlamaModel
ensure_up_proj_dequantized(int layer_idx)TinyLlamaModel
ensure_v_proj_dequantized(int layer_idx)TinyLlamaModel
f32_concatenated_weights_loaded_TinyLlamaModelprivate
final_normTinyLlamaModelprivate
final_norm_f32TinyLlamaModelprivate
final_norm_q4kTinyLlamaModelprivate
final_norm_q6kTinyLlamaModelprivate
forward(std::vector< float > &input, int n_tokens, KVCache *kv_cache, const std::vector< int > *attention_mask)TinyLlamaModel
forward_cpu_batch(const std::vector< float > &batch_input_activations, int num_tokens_in_batch, int num_cpu_layers_to_process, int start_pos_in_sequence, KVCache *kv_cache, const std::vector< int > &prompt_lengths={})TinyLlamaModel
forward_cpu_batch_generation(const std::vector< float > &batch_input_activations, const std::vector< int > &token_positions, const std::vector< int > &original_sequence_indices, int num_tokens_in_batch, KVCache *kv_cache)TinyLlamaModel
forward_cpu_logits_batch(const std::vector< float > &final_batch_activations, int num_tokens_in_batch)TinyLlamaModel
free_bf16_concatenated_weights()TinyLlamaModel
free_layer_gpu_weights(int layer_idx)TinyLlamaModel
get_config() constTinyLlamaModelinline
get_embed_tokens() constTinyLlamaModelinline
get_gguf_data() constTinyLlamaModelinline
get_gguf_data_ptr()TinyLlamaModelinline
get_layers()TinyLlamaModelinline
get_lm_head() constTinyLlamaModelinline
get_vocab_size() constTinyLlamaModel
gguf_data_TinyLlamaModelprivate
initialize_gpu_and_rope()TinyLlamaModel
initialize_rope_freqs()TinyLlamaModel
initialize_weights(const SafeTensorsLoader *loader, const GGUFData *gguf)TinyLlamaModelprivate
layersTinyLlamaModelprivate
lm_headTinyLlamaModelprivate
lm_head_f32TinyLlamaModelprivate
lm_head_q4kTinyLlamaModelprivate
lm_head_q6kTinyLlamaModelprivate
lm_head_q8_0TinyLlamaModelprivate
lm_head_q8kTinyLlamaModelprivate
lookup_embedding(int token_id)TinyLlamaModel
map_gguf_weightsTinyLlamaModelfriend
model_path_TinyLlamaModelprivate
precomputed_freqs_cis_TinyLlamaModelprivate
smart_gemm_batch_cuda(bool transa_user, bool transb_user, int m_user, int n_user, int k_user, const float *alpha_user, const float *A_f32_user, int lda_user, const float *B_f32_user, int ldb_user, const float *beta_user, float *C_f32_user, int ldc_user, cudaStream_t stream, const char *operation_name="GEMM")TinyLlamaModel
TinyLlamaModel(const ModelConfig &config, const SafeTensorsLoader &loader)TinyLlamaModel
TinyLlamaModel(const ModelConfig &initial_config, const std::string &model_path)TinyLlamaModel
TinyLlamaModel(const ModelConfig &config_from_session, std::unique_ptr< GGUFData > gguf_data_from_session)TinyLlamaModel
use_bf16_tensor_cores_TinyLlamaModelprivate
~TinyLlamaModel()TinyLlamaModel