Advanced Chunk Processing Library 0.2.0
A comprehensive C++ library for advanced data chunking strategies and processing operations
Loading...
Searching...
No Matches
chunk_processing::EntropyStrategy< T > Class Template Reference

#include <chunk_strategies.hpp>

+ Inheritance diagram for chunk_processing::EntropyStrategy< T >:
+ Collaboration diagram for chunk_processing::EntropyStrategy< T >:

Public Member Functions

 EntropyStrategy (double threshold)
 
std::vector< std::vector< T > > apply (const std::vector< T > &data) const override
 

Private Member Functions

double calculate_entropy (const std::vector< T > &chunk) const
 

Private Attributes

double threshold_
 

Detailed Description

template<typename T>
class chunk_processing::EntropyStrategy< T >

Definition at line 140 of file chunk_strategies.hpp.

Constructor & Destructor Documentation

◆ EntropyStrategy()

template<typename T >
chunk_processing::EntropyStrategy< T >::EntropyStrategy ( double  threshold)
inlineexplicit

Definition at line 166 of file chunk_strategies.hpp.

Member Function Documentation

◆ apply()

template<typename T >
std::vector< std::vector< T > > chunk_processing::EntropyStrategy< T >::apply ( const std::vector< T > &  data) const
inlineoverridevirtual

Implements chunk_processing::ChunkStrategy< T >.

Definition at line 168 of file chunk_strategies.hpp.

168 {
169 std::vector<std::vector<T>> result;
170 if (data.empty())
171 return result;
172
173 // If threshold is 0, return the entire data as a single chunk
174 if (threshold_ <= 0.0) {
175 return {data};
176 }
177
178 std::vector<T> current_chunk;
179 for (const auto& value : data) {
180 current_chunk.push_back(value);
181
182 if (current_chunk.size() > 1) {
183 double entropy = calculate_entropy(current_chunk);
184 if (entropy > threshold_) {
185 result.push_back(current_chunk);
186 current_chunk.clear();
187 }
188 }
189 }
190
191 if (!current_chunk.empty()) {
192 result.push_back(current_chunk);
193 }
194
195 return result;
196 }
double calculate_entropy(const std::vector< T > &chunk) const

References chunk_processing::EntropyStrategy< T >::calculate_entropy(), and chunk_processing::EntropyStrategy< T >::threshold_.

Referenced by TEST_F(), TEST_F(), TEST_F(), TEST_F(), and TEST_F().

◆ calculate_entropy()

template<typename T >
double chunk_processing::EntropyStrategy< T >::calculate_entropy ( const std::vector< T > &  chunk) const
inlineprivate

Definition at line 144 of file chunk_strategies.hpp.

144 {
145 if (chunk.empty())
146 return 0.0;
147
148 // Calculate frequency distribution
149 std::map<T, double> freq;
150 for (const auto& val : chunk) {
151 freq[val] += 1.0;
152 }
153
154 // Calculate entropy
155 double entropy = 0.0;
156 double n = static_cast<double>(chunk.size());
157 for (const auto& pair : freq) {
158 double p = pair.second / n;
159 entropy -= p * std::log2(p);
160 }
161
162 return entropy;
163 }

Referenced by chunk_processing::EntropyStrategy< T >::apply().

Member Data Documentation

◆ threshold_

template<typename T >
double chunk_processing::EntropyStrategy< T >::threshold_
private

Definition at line 142 of file chunk_strategies.hpp.

Referenced by chunk_processing::EntropyStrategy< T >::apply().


The documentation for this class was generated from the following file: