Advanced Chunk Processing Library 0.2.0
A comprehensive C++ library for advanced data chunking strategies and processing operations
Loading...
Searching...
No Matches
main.cpp
Go to the documentation of this file.
1/*Copyright (C) 2024 Jonathan Reich
2This program is free software; you can redistribute it and/or modify
3it under the terms of the GNU General Public License as published by
4the Free Software Foundation; either version 2 of the License, or
5(at your option) any later version.
6This program is distributed in the hope that it will be useful,
7but WITHOUT ANY WARRANTY; without even the implied warranty of
8MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9GNU General Public License for more details.
10You should have received a copy of the GNU General Public License along
11with this program; if not, see <https://www.gnu.org/licenses/>.
12*/
13
14/**
15 * @file main.cpp
16 * @brief Demonstrates various chunking strategies and operations.
17 *
18 * This file contains examples of how to use the chunking library to process
19 * data in different ways, including integer, float, and string chunking.
20 */
21
23#include "chunk.hpp"
24#include "chunk_compression.hpp"
25#include "chunk_strategies.hpp"
26#include "chunk_windows.hpp"
27#include "config.hpp"
28#include "data_structures.hpp"
29#include "parallel_chunk.hpp"
31#include "utils.hpp"
32#include <iomanip>
33#include <iostream>
34#include <memory>
35#include <numeric>
36#include <vector>
37
38using namespace advanced_structures; // For ChunkSkipList and ChunkBPlusTree
39using namespace parallel_chunk; // For ParallelChunkProcessor
40using namespace chunk_compression; // For ChunkCompressor
41using namespace chunk_processing; // For all chunking strategies
42using namespace chunk_windows;
43
44/**
45 * @brief Helper function to print chunks
46 * @tparam T The type of elements in the chunks
47 * @param chunks The vector of chunks to print
48 */
49template <typename T>
50void print_chunks(const std::vector<std::vector<T>>& chunks) {
51 std::cout << "Chunks: [" << std::endl;
52 for (size_t i = 0; i < chunks.size(); ++i) {
53 std::cout << " " << i << ": [";
54 for (const auto& value : chunks[i]) {
55 std::cout << std::fixed << std::setprecision(2) << value << " ";
56 }
57 std::cout << "]" << std::endl;
58 }
59 std::cout << "]" << std::endl;
60}
61
62/**
63 * @brief Helper function to print sub-chunks with detailed formatting
64 * @tparam T The type of elements in the chunks
65 * @param sub_chunks The 3D vector containing the sub-chunks to print
66 * @param label The label to display for this set of sub-chunks
67 * @param precision The number of decimal places to show for floating-point numbers
68 */
69template <typename T>
70void print_sub_chunks(const std::vector<std::vector<std::vector<T>>>& sub_chunks,
71 const std::string& label, int precision = 2) {
72 std::cout << "\n" << label << ":\n";
73 for (size_t i = 0; i < sub_chunks.size(); ++i) {
74 std::cout << "Level " << i + 1 << ":\n";
75 for (size_t j = 0; j < sub_chunks[i].size(); ++j) {
76 std::cout << " Sub-chunk " << j + 1 << ": ";
77 for (const auto& val : sub_chunks[i][j]) {
78 std::cout << std::fixed << std::setprecision(precision) << val << " ";
79 }
80 std::cout << "\n";
81 }
82 }
83}
84
85/**
86 * @brief Demonstrates complex recursive sub-chunking with multiple levels
87 *
88 * This function shows how to apply recursive sub-chunking strategies
89 * to data with clear patterns, using variance-based chunking at multiple levels.
90 */
92 std::cout << "\n=== Complex Recursive Sub-chunking ===" << std::endl;
93
94 // Example data - now using 1D vector
95 std::vector<double> data = {1.0, 1.1, 1.2, 5.0, 5.1, 5.2, 2.0, 2.1, 2.2, 10.0, 10.1, 10.2};
96
97 auto variance_strategy = std::make_shared<chunk_processing::VarianceStrategy<double>>(3.0);
98 chunk_processing::RecursiveSubChunkStrategy<double> recursive_chunker(variance_strategy, 3, 2);
99 auto recursive_result = recursive_chunker.apply(data);
100
101 print_chunks(recursive_result);
102}
103
104/**
105 * @brief Demonstrates hierarchical sub-chunking using multiple strategies
106 *
107 * This function shows how to apply different chunking strategies
108 * in a hierarchical manner, combining variance, similarity, and entropy-based approaches.
109 */
111 std::cout << "\n=== Multi-Strategy Sub-chunking ===" << std::endl;
112
113 // Example data - now using 1D vector
114 std::vector<double> data = {1.0, 1.1, 1.2, 5.0, 5.1, 5.2, 2.0, 2.1, 2.2, 10.0, 10.1, 10.2};
115
116 std::vector<std::shared_ptr<chunk_processing::ChunkStrategy<double>>> strategies = {
117 std::make_shared<chunk_processing::VarianceStrategy<double>>(5.0),
118 std::make_shared<chunk_processing::EntropyStrategy<double>>(1.0)};
119
120 chunk_processing::HierarchicalSubChunkStrategy<double> hierarchical_chunker(strategies, 2);
121 auto hierarchical_result = hierarchical_chunker.apply(data);
122
123 print_chunks(hierarchical_result);
124}
125
126/**
127 * @brief Demonstrates adaptive conditional sub-chunking
128 *
129 * This function shows how to use conditional sub-chunking with
130 * adaptive thresholds based on chunk properties.
131 */
133 std::cout << "\n=== Adaptive Conditional Sub-chunking ===" << std::endl;
134
135 // Example data - now using 1D vector
136 std::vector<double> data = {1.0, 1.1, 1.2, 5.0, 5.1, 5.2, 2.0, 2.1, 2.2, 10.0, 10.1, 10.2};
137
138 auto variance_strategy = std::make_shared<chunk_processing::VarianceStrategy<double>>(5.0);
139 auto condition = [](const std::vector<double>& chunk) {
140 return chunk.size() > 5; // Only sub-chunk large chunks
141 };
142
143 chunk_processing::ConditionalSubChunkStrategy<double> conditional_chunker(variance_strategy,
144 condition, 2);
145 auto conditional_result = conditional_chunker.apply(data);
146
147 print_chunks(conditional_result);
148}
149
150/**
151 * @brief Main function demonstrating various chunking strategies
152 * @return 0 on successful execution
153 */
154int main(int argc, char* argv[]) {
158
159 std::cout << "\n=== Demonstrating Advanced Chunking Structures ===\n";
160
161 // Example: SemanticChunker usage
162 std::cout << "\n=== SemanticChunker Example ===" << std::endl;
163 SemanticChunker<std::string> text_chunker;
164 std::string text = "This is the first sentence. This is the second one. And here's a third!";
165 auto text_chunks = text_chunker.chunk(text);
166 std::cout << "Text chunks created: " << text_chunks.size() << "\n";
167
168 // Custom NLP model example
169 class CustomNLPModel {
170 public:
171 double calculateSimilarity(const std::string& s1, const std::string& s2) {
172 // Simple example: compare lengths as a similarity metric
173 return std::abs(1.0 - static_cast<double>(std::abs(static_cast<int>(s1.length()) -
174 static_cast<int>(s2.length()))) /
175 std::max(s1.length(), s2.length()));
176 }
177 };
178
180 auto custom_chunks = custom_chunker.chunk(text);
181 std::cout << "Custom model chunks created: " << custom_chunks.size() << "\n\n";
182
183 return 0;
184}
int main()
Definition benchmark.cpp:35
Defines various strategies for chunking data based on different criteria.
Template class for semantic-based content chunking.
std::vector< ContentType > chunk(const ContentType &content)
Chunk content based on semantic boundaries.
std::vector< std::vector< T > > apply(const std::vector< T > &data) const override
std::vector< std::vector< T > > apply(const std::vector< T > &data) const override
void demonstrate_multi_strategy_subchunking()
Demonstrates hierarchical sub-chunking using multiple strategies.
Definition main.cpp:110
void demonstrate_adaptive_conditional_subchunking()
Demonstrates adaptive conditional sub-chunking.
Definition main.cpp:132
void demonstrate_complex_recursive_subchunking()
Demonstrates complex recursive sub-chunking with multiple levels.
Definition main.cpp:91
void print_sub_chunks(const std::vector< std::vector< std::vector< T > > > &sub_chunks, const std::string &label, int precision=2)
Helper function to print sub-chunks with detailed formatting.
Definition main.cpp:70
void print_chunks(const std::vector< std::vector< T > > &chunks)
Helper function to print chunks.
Definition main.cpp:50
Advanced sub-chunking strategies for hierarchical data processing.