Advanced Chunk Processing Library 0.2.0
A comprehensive C++ library for advanced data chunking strategies and processing operations
Loading...
Searching...
No Matches
sophisticated_chunking_demo.cpp
Go to the documentation of this file.
2#include <iostream>
3#include <string>
4#include <vector>
5
6using namespace sophisticated_chunking;
7
8/**
9 * @brief Demonstrate wavelet-based chunking with different data types
10 */
12 std::cout << "\n=== Wavelet-based Chunking Demonstration ===\n";
13
14 // Numeric data
15 std::vector<double> numeric_data = {1.0, 1.1, 1.2, 5.0, 5.1, 5.2, 2.0, 2.1, 2.2};
16 WaveletChunking<double> wavelet_numeric(4, 0.5);
17 auto numeric_chunks = wavelet_numeric.chunk(numeric_data);
18
19 // Text data
20 std::string text = "This is a test sentence. Another sentence here. And one more.";
21 std::vector<char> char_data(text.begin(), text.end());
22 WaveletChunking<char> wavelet_text(8, 0.3);
23 auto text_chunks = wavelet_text.chunk(char_data);
24
25 // Print results
26 std::cout << "Numeric chunks: " << numeric_chunks.size() << "\n";
27 std::cout << "Text chunks: " << text_chunks.size() << "\n";
28}
29
30/**
31 * @brief Demonstrate mutual information based chunking
32 */
34 std::cout << "\n=== Mutual Information Chunking Demonstration ===\n";
35
36 // Word-level data
37 std::vector<std::string> words = {"the", "quick", "brown", "fox", "jumps",
38 "over", "the", "lazy", "dog"};
40 auto word_chunks = mi_words.chunk(words);
41
42 // Binary data
43 std::vector<uint8_t> binary_data = {0xFF, 0xFE, 0xFD, 0x00, 0x01, 0x02};
44 MutualInformationChunking<uint8_t> mi_binary(2, 0.5);
45 auto binary_chunks = mi_binary.chunk(binary_data);
46
47 // Print results
48 std::cout << "Word chunks: " << word_chunks.size() << "\n";
49 std::cout << "Binary chunks: " << binary_chunks.size() << "\n";
50}
51
52/**
53 * @brief Demonstrate DTW-based chunking
54 */
56 std::cout << "\n=== DTW-based Chunking Demonstration ===\n";
57
58 // Time series data
59 std::vector<float> time_series = {1.0f, 1.2f, 1.1f, 5.0f, 5.2f, 5.1f, 2.0f, 2.2f, 2.1f};
60 DTWChunking<float> dtw_numeric(5, 1.5);
61 auto ts_chunks = dtw_numeric.chunk(time_series);
62
63 // Categorical data
64 std::vector<char> categorical = {'A', 'A', 'B', 'B', 'C', 'C', 'A', 'B'};
65 DTWChunking<char> dtw_categorical(3, 1.0);
66 auto cat_chunks = dtw_categorical.chunk(categorical);
67
68 // Print results
69 std::cout << "Time series chunks: " << ts_chunks.size() << "\n";
70 std::cout << "Categorical chunks: " << cat_chunks.size() << "\n";
71}
72
Dynamic time warping based chunking for sequence alignment.
std::vector< std::vector< T > > chunk(const std::vector< T > &data) const
Chunk data based on DTW analysis.
Information theory based chunking using mutual information.
std::vector< std::vector< T > > chunk(const std::vector< T > &data) const
Chunk data based on mutual information analysis.
Wavelet-based chunking strategy using signal processing principles.
std::vector< std::vector< T > > chunk(const std::vector< T > &data) const
Chunk data based on wavelet transform analysis.
void demonstrate_wavelet_chunking()
Demonstrate wavelet-based chunking with different data types.
void demonstrate_dtw_chunking()
Demonstrate DTW-based chunking.
void demonstrate_mutual_information_chunking()
Demonstrate mutual information based chunking.