TinyLlama.cpp
1.0
A lightweight C++ implementation of the TinyLlama language model
Loading...
Searching...
No Matches
kv_cache.h
Go to the documentation of this file.
1
#ifndef KV_CACHE_H
2
#define KV_CACHE_H
3
4
#include "
model.h
"
5
6
void
initialize_kv_cache
(
KVCache
* kv_cache,
const
ModelConfig
& config,
7
int
total_num_model_layers,
int
num_gpu_layers_to_allocate,
8
int
max_seq_len_arg,
int
num_kv_heads,
9
int
head_dim,
int
max_batch_size_arg);
10
11
#endif
// KV_CACHE_H
initialize_kv_cache
void initialize_kv_cache(KVCache *kv_cache, const ModelConfig &config, int total_num_model_layers, int num_gpu_layers_to_allocate, int max_seq_len_arg, int num_kv_heads, int head_dim, int max_batch_size_arg)
model.h
KVCache
Complete Key-Value cache for all transformer layers.
Definition
model.h:151
ModelConfig
Model configuration structure holding architecture and hyperparameters.
Definition
model.h:80
Generated by
1.9.8