TinyLlama.cpp
1.0
A lightweight C++ implementation of the TinyLlama language model
Loading...
Searching...
No Matches
cpu_batch_processor.h
Go to the documentation of this file.
1
#pragma once
2
3
#include "
model.h
"
4
#include "
kv_cache.h
"
5
#include <vector>
6
7
class
CPUBatchProcessor
{
8
public
:
9
explicit
CPUBatchProcessor
(
TinyLlamaModel
* model);
10
11
std::vector<float>
forward_cpu_batch
(
12
const
std::vector<float>& batch_input_activations,
13
int
num_tokens_in_batch,
14
int
num_cpu_layers_to_process,
15
int
start_pos_in_sequence,
16
KVCache
* kv_cache,
17
const
std::vector<int>& prompt_lengths);
18
19
private
:
20
TinyLlamaModel
*
model_
;
21
};
CPUBatchProcessor
Definition
cpu_batch_processor.h:7
CPUBatchProcessor::forward_cpu_batch
std::vector< float > forward_cpu_batch(const std::vector< float > &batch_input_activations, int num_tokens_in_batch, int num_cpu_layers_to_process, int start_pos_in_sequence, KVCache *kv_cache, const std::vector< int > &prompt_lengths)
Definition
cpu_batch_processor.cpp:11
CPUBatchProcessor::model_
TinyLlamaModel * model_
Definition
cpu_batch_processor.h:20
TinyLlamaModel
Main transformer model class for TinyLlama.
Definition
model.h:285
kv_cache.h
model.h
KVCache
Complete Key-Value cache for all transformer layers.
Definition
model.h:151
Generated by
1.9.8