Advanced Chunk Processing Library 0.2.0
A comprehensive C++ library for advanced data chunking strategies and processing operations
Loading...
Searching...
No Matches
sub_chunk_strategies.hpp
Go to the documentation of this file.
1/**
2 * @file sub_chunk_strategies.hpp
3 * @brief Advanced sub-chunking strategies for hierarchical data processing
4 *
5 * This file provides implementations of various sub-chunking strategies:
6 * - Recursive sub-chunking for depth-based processing
7 * - Hierarchical sub-chunking for level-based processing
8 * - Conditional sub-chunking for property-based processing
9 */
10#pragma once
11
12#include "chunk_strategies.hpp"
13#include <atomic>
14#include <functional>
15#include <iterator>
16#include <memory>
17#include <mutex>
18#include <stdexcept>
19#include <vector>
20
21namespace chunk_processing {
22
23namespace detail {
24// Helper functions for safe operations
25template <typename T>
26bool is_valid_chunk(const std::vector<T>& chunk) {
27 return !chunk.empty();
28}
29
30template <typename T>
31bool is_valid_chunks(const std::vector<std::vector<T>>& chunks) {
32 return !chunks.empty() && std::all_of(chunks.begin(), chunks.end(),
33 [](const auto& c) { return is_valid_chunk(c); });
34}
35
36template <typename T>
37std::vector<std::vector<T>> safe_copy(const std::vector<std::vector<T>>& chunks) {
38 try {
39 return chunks;
40 } catch (...) {
41 return {};
42 }
43}
44} // namespace detail
45
46template <typename T>
48private:
49 const std::shared_ptr<ChunkStrategy<T>> base_strategy_;
50 const size_t max_depth_;
51 const size_t min_size_;
52 mutable std::mutex mutex_;
53 std::atomic<bool> is_processing_{false};
54
55 std::vector<std::vector<T>> safe_recursive_apply(const std::vector<T>& data,
56 const size_t current_depth) {
57 // Guard against reentrant calls
58 if (is_processing_.exchange(true)) {
59 throw std::runtime_error("Recursive strategy already processing");
60 }
61 struct Guard {
62 std::atomic<bool>& flag;
63 Guard(std::atomic<bool>& f) : flag(f) {}
64 ~Guard() {
65 flag = false;
66 }
67 } guard(is_processing_);
68
69 try {
70 if (data.empty() || current_depth >= max_depth_ || data.size() <= min_size_) {
71 return data.empty() ? std::vector<std::vector<T>>{}
72 : std::vector<std::vector<T>>{data};
73 }
74
75 std::vector<std::vector<T>> result;
76 {
77 std::lock_guard<std::mutex> lock(mutex_);
78 if (!base_strategy_) {
79 throw std::runtime_error("Invalid base strategy");
80 }
81 auto chunks = detail::safe_copy(base_strategy_->apply(data));
82 if (!detail::is_valid_chunks(chunks)) {
83 return {data};
84 }
85 result.reserve(chunks.size() * 2);
86
87 for (const auto& chunk : chunks) {
88 if (chunk.size() <= min_size_) {
89 result.push_back(chunk);
90 continue;
91 }
92
93 try {
94 auto sub_chunks = safe_recursive_apply(chunk, current_depth + 1);
95 if (detail::is_valid_chunks(sub_chunks)) {
96 result.insert(result.end(), std::make_move_iterator(sub_chunks.begin()),
97 std::make_move_iterator(sub_chunks.end()));
98 } else {
99 result.push_back(chunk);
100 }
101 } catch (...) {
102 result.push_back(chunk);
103 }
104 }
105 }
106 return result.empty() ? std::vector<std::vector<T>>{data} : result;
107 } catch (...) {
108 return {data};
109 }
110 }
111
112public:
113 RecursiveSubChunkStrategy(std::shared_ptr<ChunkStrategy<T>> strategy, size_t max_depth = 5,
114 size_t min_size = 2)
115 : base_strategy_(strategy), max_depth_(max_depth), min_size_(min_size) {
116 if (!strategy)
117 throw std::invalid_argument("Invalid strategy");
118 if (max_depth == 0)
119 throw std::invalid_argument("Invalid max depth");
120 if (min_size == 0)
121 throw std::invalid_argument("Invalid min size");
122 }
123
124 std::vector<std::vector<T>> apply(const std::vector<T>& data) const override {
125 try {
126 if (data.empty())
127 return {};
128 return const_cast<RecursiveSubChunkStrategy*>(this)->safe_recursive_apply(data, 0);
129 } catch (...) {
130 return {data};
131 }
132 }
133};
134
135template <typename T>
137private:
138 std::vector<std::shared_ptr<ChunkStrategy<T>>> strategies_;
139 size_t min_size_;
140
141 // Add helper method to safely process chunks
142 std::vector<std::vector<T>>
143 process_chunk(const std::vector<T>& chunk,
144 const std::shared_ptr<ChunkStrategy<T>>& strategy) const {
145 if (!strategy) {
146 throw std::runtime_error("Invalid strategy encountered");
147 }
148
149 if (chunk.size() <= min_size_) {
150 return {chunk};
151 }
152
153 try {
154 auto sub_chunks = strategy->apply(chunk);
155 if (sub_chunks.empty()) {
156 return {chunk};
157 }
158
159 // Validate sub-chunks
160 for (const auto& sub : sub_chunks) {
161 if (sub.empty()) {
162 return {chunk};
163 }
164 }
165
166 return sub_chunks;
167 } catch (const std::exception& e) {
168 // If strategy fails, return original chunk
169 return {chunk};
170 }
171 }
172
173public:
174 HierarchicalSubChunkStrategy(std::vector<std::shared_ptr<ChunkStrategy<T>>> strategies,
175 size_t min_size)
176 : min_size_(min_size) {
177 // Validate inputs
178 if (strategies.empty()) {
179 throw std::invalid_argument("Strategies vector cannot be empty");
180 }
181
182 // Deep copy strategies to ensure ownership
183 strategies_.reserve(strategies.size());
184 for (const auto& strategy : strategies) {
185 if (!strategy) {
186 throw std::invalid_argument("Strategy cannot be null");
187 }
188 strategies_.push_back(strategy);
189 }
190
191 if (min_size == 0) {
192 throw std::invalid_argument("Minimum size must be positive");
193 }
194 }
195
196 std::vector<std::vector<T>> apply(const std::vector<T>& data) const override {
197 if (data.empty()) {
198 return {};
199 }
200 if (data.size() <= min_size_) {
201 return {data};
202 }
203
204 try {
205 std::vector<std::vector<T>> current_chunks{data};
206
207 // Process each strategy level
208 for (const auto& strategy : strategies_) {
209 if (!strategy) {
210 throw std::runtime_error("Invalid strategy encountered");
211 }
212
213 std::vector<std::vector<T>> next_level;
214 next_level.reserve(current_chunks.size() *
215 2); // Reserve space to prevent reallocation
216
217 // Process each chunk at current level
218 for (const auto& chunk : current_chunks) {
219 if (chunk.empty()) {
220 continue;
221 }
222
223 auto sub_chunks = process_chunk(chunk, strategy);
224 next_level.insert(next_level.end(), std::make_move_iterator(sub_chunks.begin()),
225 std::make_move_iterator(sub_chunks.end()));
226 }
227
228 if (next_level.empty()) {
229 return current_chunks; // Return last valid chunking if next level failed
230 }
231
232 current_chunks = std::move(next_level);
233 }
234
235 return current_chunks;
236
237 } catch (const std::exception& e) {
238 throw std::runtime_error(std::string("Error in hierarchical strategy: ") + e.what());
239 }
240 }
241};
242
243template <typename T>
245private:
246 std::shared_ptr<ChunkStrategy<T>> base_strategy_;
247 std::function<bool(const std::vector<T>&)> condition_;
248 size_t min_size_;
249
250public:
252 std::function<bool(const std::vector<T>&)> condition,
253 size_t min_size)
254 : base_strategy_(strategy), condition_(condition), min_size_(min_size) {
255 // Validate inputs
256 if (!strategy) {
257 throw std::invalid_argument("Base strategy cannot be null");
258 }
259 if (!condition) {
260 throw std::invalid_argument("Condition function cannot be null");
261 }
262 if (min_size == 0) {
263 throw std::invalid_argument("Minimum size must be positive");
264 }
265 }
266
267 std::vector<std::vector<T>> apply(const std::vector<T>& data) const override {
268 if (data.empty()) {
269 return {};
270 }
271 if (data.size() <= min_size_) {
272 return {data};
273 }
274
275 try {
276 // Safely check condition and apply strategy
277 if (condition_ && condition_(data)) {
278 if (base_strategy_) {
279 return base_strategy_->apply(data);
280 }
281 }
282 } catch (const std::exception& e) {
283 throw std::runtime_error(std::string("Error in conditional strategy: ") + e.what());
284 }
285
286 return {data};
287 }
288};
289
290} // namespace chunk_processing
Defines various strategies for chunking data based on different criteria.
std::shared_ptr< ChunkStrategy< T > > base_strategy_
ConditionalSubChunkStrategy(std::shared_ptr< ChunkStrategy< T > > strategy, std::function< bool(const std::vector< T > &)> condition, size_t min_size)
std::vector< std::vector< T > > apply(const std::vector< T > &data) const override
std::function< bool(const std::vector< T > &)> condition_
HierarchicalSubChunkStrategy(std::vector< std::shared_ptr< ChunkStrategy< T > > > strategies, size_t min_size)
std::vector< std::shared_ptr< ChunkStrategy< T > > > strategies_
std::vector< std::vector< T > > process_chunk(const std::vector< T > &chunk, const std::shared_ptr< ChunkStrategy< T > > &strategy) const
std::vector< std::vector< T > > apply(const std::vector< T > &data) const override
RecursiveSubChunkStrategy(std::shared_ptr< ChunkStrategy< T > > strategy, size_t max_depth=5, size_t min_size=2)
const std::shared_ptr< ChunkStrategy< T > > base_strategy_
std::vector< std::vector< T > > apply(const std::vector< T > &data) const override
std::vector< std::vector< T > > safe_recursive_apply(const std::vector< T > &data, const size_t current_depth)
bool is_valid_chunks(const std::vector< std::vector< T > > &chunks)
std::vector< std::vector< T > > safe_copy(const std::vector< std::vector< T > > &chunks)
bool is_valid_chunk(const std::vector< T > &chunk)