|
TinyLlama.cpp 1.0
A lightweight C++ implementation of the TinyLlama language model
|
This is the complete list of members for tinyllama::TinyLlamaSession, including all inherited members.
| batch_generation_parallel(const std::vector< int > ¤t_tokens, const std::vector< int > &token_positions, const std::vector< int > &original_sequence_indices, std::vector< std::vector< float > > &batch_logits) | tinyllama::TinyLlamaSession | private |
| batch_prefill_parallel(const std::vector< std::vector< int > > &all_tokens, const std::vector< int > &prompt_lengths, std::vector< std::vector< float > > &batch_final_logits) | tinyllama::TinyLlamaSession | private |
| config_ | tinyllama::TinyLlamaSession | private |
| eos_token_id_ | tinyllama::TinyLlamaSession | private |
| generate(const std::string &prompt, int steps=128, float temperature=0.1f, int top_k=40, float top_p=0.9f, const std::string &system_prompt="", bool apply_q_a_format=false) | tinyllama::TinyLlamaSession | |
| generate_batch(const std::vector< std::string > &prompts, int steps=128, float temperature=0.1f, int top_k=40, float top_p=0.9f, const std::string &system_prompt="", bool apply_q_a_format=false) | tinyllama::TinyLlamaSession | |
| generated_stream_ | tinyllama::TinyLlamaSession | private |
| generated_text_for_api_return_ | tinyllama::TinyLlamaSession | private |
| get_config() const | tinyllama::TinyLlamaSession | inline |
| get_kv_cache() | tinyllama::TinyLlamaSession | inline |
| get_tokenizer() const | tinyllama::TinyLlamaSession | inline |
| kv_cache_ | tinyllama::TinyLlamaSession | private |
| max_batch_size_ | tinyllama::TinyLlamaSession | private |
| model_ | tinyllama::TinyLlamaSession | private |
| operator=(const TinyLlamaSession &)=delete | tinyllama::TinyLlamaSession | private |
| rng_ | tinyllama::TinyLlamaSession | private |
| threads_ | tinyllama::TinyLlamaSession | private |
| TinyLlamaSession(const std::string &model_path, const std::string &tokenizer_path, int threads=1, int num_gpu_layers_from_cli=0, bool cli_use_mmap=true, bool use_kv_quant=false, bool use_batch_generation=false, int max_batch_size=1) | tinyllama::TinyLlamaSession | |
| TinyLlamaSession(const TinyLlamaSession &)=delete | tinyllama::TinyLlamaSession | private |
| tokenizer_ | tinyllama::TinyLlamaSession | private |
| use_batch_generation_ | tinyllama::TinyLlamaSession | private |
| ~TinyLlamaSession() | tinyllama::TinyLlamaSession |