TinyLlama.cpp 1.0
A lightweight C++ implementation of the TinyLlama language model
Loading...
Searching...
No Matches
kv_cache.h
Go to the documentation of this file.
1#ifndef KV_CACHE_H
2#define KV_CACHE_H
3
4#include "model.h"
5
6void initialize_kv_cache(KVCache* kv_cache, const ModelConfig& config,
7 int total_num_model_layers, int num_gpu_layers_to_allocate,
8 int max_seq_len_arg, int num_kv_heads,
9 int head_dim, int max_batch_size_arg);
10
11#endif // KV_CACHE_H
void initialize_kv_cache(KVCache *kv_cache, const ModelConfig &config, int total_num_model_layers, int num_gpu_layers_to_allocate, int max_seq_len_arg, int num_kv_heads, int head_dim, int max_batch_size_arg)
Complete Key-Value cache for all transformer layers.
Definition model.h:151
Model configuration structure holding architecture and hyperparameters.
Definition model.h:80