Advanced Chunk Processing Library 0.2.0
A comprehensive C++ library for advanced data chunking strategies and processing operations
Loading...
Searching...
No Matches
chunk_metrics::ChunkQualityAnalyzer< T > Class Template Reference

#include <chunk_metrics.hpp>

Public Member Functions

 ChunkQualityAnalyzer ()=default
 
 ChunkQualityAnalyzer (ChunkQualityAnalyzer &&)=delete
 
 ChunkQualityAnalyzer (const ChunkQualityAnalyzer &)=delete
 
 ~ChunkQualityAnalyzer ()=default
 
void clear_cache () const
 
bool compare_cohesion (const std::vector< std::vector< T > > &well_separated, const std::vector< std::vector< T > > &mixed, double &high_result, double &mixed_result) const
 
double compute_cohesion (const std::vector< std::vector< T > > &chunks) const
 
double compute_quality_score (const std::vector< std::vector< T > > &chunks) const
 
double compute_separation (const std::vector< std::vector< T > > &chunks) const
 
double compute_silhouette_score (const std::vector< std::vector< T > > &chunks) const
 
std::map< std::string, double > compute_size_metrics (const std::vector< std::vector< T > > &chunks) const
 
ChunkQualityAnalyzeroperator= (ChunkQualityAnalyzer &&)=delete
 
ChunkQualityAnalyzeroperator= (const ChunkQualityAnalyzer &)=delete
 

Private Member Functions

double compute_chunk_cohesion (const std::vector< T > &chunk) const
 

Detailed Description

template<typename T>
class chunk_metrics::ChunkQualityAnalyzer< T >

Definition at line 59 of file chunk_metrics.hpp.

Constructor & Destructor Documentation

◆ ChunkQualityAnalyzer() [1/3]

template<typename T >
chunk_metrics::ChunkQualityAnalyzer< T >::ChunkQualityAnalyzer ( )
default

◆ ~ChunkQualityAnalyzer()

template<typename T >
chunk_metrics::ChunkQualityAnalyzer< T >::~ChunkQualityAnalyzer ( )
default

◆ ChunkQualityAnalyzer() [2/3]

template<typename T >
chunk_metrics::ChunkQualityAnalyzer< T >::ChunkQualityAnalyzer ( const ChunkQualityAnalyzer< T > &  )
delete

◆ ChunkQualityAnalyzer() [3/3]

template<typename T >
chunk_metrics::ChunkQualityAnalyzer< T >::ChunkQualityAnalyzer ( ChunkQualityAnalyzer< T > &&  )
delete

Member Function Documentation

◆ clear_cache()

template<typename T >
void chunk_metrics::ChunkQualityAnalyzer< T >::clear_cache ( ) const
inline

Definition at line 279 of file chunk_metrics.hpp.

279 {
280 // No-op in single-threaded version
281 }

◆ compare_cohesion()

template<typename T >
bool chunk_metrics::ChunkQualityAnalyzer< T >::compare_cohesion ( const std::vector< std::vector< T > > &  well_separated,
const std::vector< std::vector< T > > &  mixed,
double &  high_result,
double &  mixed_result 
) const
inline

Definition at line 117 of file chunk_metrics.hpp.

120 {
121 try {
122 if (well_separated.empty() || mixed.empty()) {
123 return false;
124 }
125
126 high_result = compute_cohesion(well_separated);
127 mixed_result = compute_cohesion(mixed);
128
129 return std::isfinite(high_result) &&
130 std::isfinite(mixed_result) &&
131 high_result > mixed_result;
132 } catch (...) {
133 return false;
134 }
135 }
double compute_cohesion(const std::vector< std::vector< T > > &chunks) const

◆ compute_chunk_cohesion()

template<typename T >
double chunk_metrics::ChunkQualityAnalyzer< T >::compute_chunk_cohesion ( const std::vector< T > &  chunk) const
inlineprivate

Definition at line 61 of file chunk_metrics.hpp.

61 {
62 if (chunk.size() < 2) return 0.0;
63
64 std::vector<double> distances;
65 distances.reserve((chunk.size() * (chunk.size() - 1)) / 2);
66
67 for (size_t i = 0; i < chunk.size(); ++i) {
68 for (size_t j = i + 1; j < chunk.size(); ++j) {
69 double dist = detail::safe_distance(chunk[i], chunk[j]);
70 if (dist < std::numeric_limits<double>::max()) {
71 distances.push_back(dist);
72 }
73 }
74 }
75
76 if (distances.empty()) return 0.0;
77 std::sort(distances.begin(), distances.end());
78 return distances[distances.size() / 2]; // Return median distance
79 }
double safe_distance(const T &a, const T &b)

◆ compute_cohesion()

template<typename T >
double chunk_metrics::ChunkQualityAnalyzer< T >::compute_cohesion ( const std::vector< std::vector< T > > &  chunks) const
inline

Definition at line 91 of file chunk_metrics.hpp.

91 {
92 if (chunks.empty()) {
93 throw std::invalid_argument("Empty chunks");
94 }
95
96 std::vector<double> cohesion_values;
97 cohesion_values.reserve(chunks.size());
98
99 for (const auto& chunk : chunks) {
100 if (chunk.empty() || chunk.size() > 1000000) {
101 throw std::invalid_argument("Invalid chunk size");
102 }
103 double chunk_cohesion = compute_chunk_cohesion(chunk);
104 if (std::isfinite(chunk_cohesion)) {
105 cohesion_values.push_back(chunk_cohesion);
106 }
107 }
108
109 if (cohesion_values.empty()) {
110 throw std::runtime_error("No valid cohesion values computed");
111 }
112
113 std::sort(cohesion_values.begin(), cohesion_values.end());
114 return cohesion_values[cohesion_values.size() / 2];
115 }
double compute_chunk_cohesion(const std::vector< T > &chunk) const

Referenced by main().

◆ compute_quality_score()

template<typename T >
double chunk_metrics::ChunkQualityAnalyzer< T >::compute_quality_score ( const std::vector< std::vector< T > > &  chunks) const
inline

Definition at line 226 of file chunk_metrics.hpp.

226 {
227 if (chunks.empty()) {
228 throw std::invalid_argument("Empty chunks vector");
229 }
230
231 try {
232 double cohesion = compute_cohesion(chunks);
233 double separation = chunks.size() > 1 ? compute_separation(chunks) : 1.0;
234
235 if (!std::isfinite(cohesion) || !std::isfinite(separation)) {
236 throw std::runtime_error("Invalid metric values computed");
237 }
238
239 return (cohesion + separation) / 2.0;
240 } catch (const std::exception& e) {
241 throw std::runtime_error(std::string("Error computing quality score: ") + e.what());
242 }
243 }
double compute_separation(const std::vector< std::vector< T > > &chunks) const

Referenced by main().

◆ compute_separation()

template<typename T >
double chunk_metrics::ChunkQualityAnalyzer< T >::compute_separation ( const std::vector< std::vector< T > > &  chunks) const
inline

Definition at line 137 of file chunk_metrics.hpp.

137 {
138 if (chunks.size() < 2) {
139 throw std::invalid_argument("Need at least two chunks for separation");
140 }
141
142 double total_separation = 0.0;
143 size_t valid_pairs = 0;
144
145 for (size_t i = 0; i < chunks.size(); ++i) {
146 for (size_t j = i + 1; j < chunks.size(); ++j) {
147 if (chunks[i].empty() || chunks[j].empty()) continue;
148
149 double mean_i = detail::safe_mean(chunks[i]);
150 double mean_j = detail::safe_mean(chunks[j]);
151
152 if (std::isfinite(mean_i) && std::isfinite(mean_j)) {
153 total_separation += std::abs(mean_i - mean_j);
154 ++valid_pairs;
155 }
156 }
157 }
158
159 if (valid_pairs == 0) {
160 throw std::runtime_error("No valid separation values computed");
161 }
162
163 return total_separation / valid_pairs;
164 }
double safe_mean(const std::vector< T > &data)

Referenced by main().

◆ compute_silhouette_score()

template<typename T >
double chunk_metrics::ChunkQualityAnalyzer< T >::compute_silhouette_score ( const std::vector< std::vector< T > > &  chunks) const
inline

Definition at line 166 of file chunk_metrics.hpp.

166 {
167 if (chunks.size() < 2) {
168 throw std::invalid_argument("Need at least two chunks for silhouette score");
169 }
170
171 double total_score = 0.0;
172 size_t total_points = 0;
173
174 for (size_t i = 0; i < chunks.size(); ++i) {
175 for (const auto& point : chunks[i]) {
176 // Calculate a (average distance to points in same chunk)
177 double a = 0.0;
178 size_t same_chunk_count = 0;
179 for (const auto& other_point : chunks[i]) {
180 if (&point != &other_point) {
181 double dist = detail::safe_distance(point, other_point);
182 if (dist < std::numeric_limits<double>::max()) {
183 a += dist;
184 ++same_chunk_count;
185 }
186 }
187 }
188 a = same_chunk_count > 0 ? a / same_chunk_count : 0.0;
189
190 // Calculate b (minimum average distance to other chunks)
191 double b = std::numeric_limits<double>::max();
192 for (size_t j = 0; j < chunks.size(); ++j) {
193 if (i != j) {
194 double avg_dist = 0.0;
195 size_t valid_dist = 0;
196 for (const auto& other_point : chunks[j]) {
197 double dist = detail::safe_distance(point, other_point);
198 if (dist < std::numeric_limits<double>::max()) {
199 avg_dist += dist;
200 ++valid_dist;
201 }
202 }
203 if (valid_dist > 0) {
204 b = std::min(b, avg_dist / valid_dist);
205 }
206 }
207 }
208
209 if (std::isfinite(a) && std::isfinite(b) && b < std::numeric_limits<double>::max()) {
210 double max_ab = std::max(a, b);
211 if (max_ab > 0) {
212 total_score += (b - a) / max_ab;
213 ++total_points;
214 }
215 }
216 }
217 }
218
219 if (total_points == 0) {
220 throw std::runtime_error("No valid silhouette scores computed");
221 }
222
223 return total_score / total_points;
224 }

Referenced by main().

◆ compute_size_metrics()

template<typename T >
std::map< std::string, double > chunk_metrics::ChunkQualityAnalyzer< T >::compute_size_metrics ( const std::vector< std::vector< T > > &  chunks) const
inline

Definition at line 245 of file chunk_metrics.hpp.

245 {
246 if (chunks.empty()) {
247 throw std::invalid_argument("Empty chunks vector");
248 }
249
250 std::map<std::string, double> metrics;
251 double avg_size = 0.0;
252 double max_size = 0.0;
253 double min_size = static_cast<double>(chunks[0].size());
254
255 for (const auto& chunk : chunks) {
256 double size = static_cast<double>(chunk.size());
257 avg_size += size;
258 max_size = std::max(max_size, size);
259 min_size = std::min(min_size, size);
260 }
261 avg_size /= static_cast<double>(chunks.size());
262
263 double variance = 0.0;
264 for (const auto& chunk : chunks) {
265 double diff = static_cast<double>(chunk.size()) - avg_size;
266 variance += diff * diff;
267 }
268 variance /= static_cast<double>(chunks.size());
269
270 metrics["average_size"] = avg_size;
271 metrics["max_size"] = max_size;
272 metrics["min_size"] = min_size;
273 metrics["size_variance"] = variance;
274 metrics["size_stddev"] = std::sqrt(variance);
275
276 return metrics;
277 }

Referenced by main().

◆ operator=() [1/2]

template<typename T >
ChunkQualityAnalyzer & chunk_metrics::ChunkQualityAnalyzer< T >::operator= ( ChunkQualityAnalyzer< T > &&  )
delete

◆ operator=() [2/2]

template<typename T >
ChunkQualityAnalyzer & chunk_metrics::ChunkQualityAnalyzer< T >::operator= ( const ChunkQualityAnalyzer< T > &  )
delete

The documentation for this class was generated from the following file: