TinyLlama.cpp 1.0
A lightweight C++ implementation of the TinyLlama language model
Loading...
Searching...
No Matches
cpu_batch_processor.h
Go to the documentation of this file.
1#pragma once
2
3#include "model.h"
4#include "kv_cache.h"
5#include <vector>
6
8public:
9 explicit CPUBatchProcessor(TinyLlamaModel* model);
10
11 std::vector<float> forward_cpu_batch(
12 const std::vector<float>& batch_input_activations,
13 int num_tokens_in_batch,
14 int num_cpu_layers_to_process,
15 int start_pos_in_sequence,
16 KVCache* kv_cache,
17 const std::vector<int>& prompt_lengths);
18
19private:
21};
std::vector< float > forward_cpu_batch(const std::vector< float > &batch_input_activations, int num_tokens_in_batch, int num_cpu_layers_to_process, int start_pos_in_sequence, KVCache *kv_cache, const std::vector< int > &prompt_lengths)
TinyLlamaModel * model_
Main transformer model class for TinyLlama.
Definition model.h:285
Complete Key-Value cache for all transformer layers.
Definition model.h:151