62 if (chunk.size() < 2)
return 0.0;
64 std::vector<double> distances;
65 distances.reserve((chunk.size() * (chunk.size() - 1)) / 2);
67 for (
size_t i = 0; i < chunk.size(); ++i) {
68 for (
size_t j = i + 1; j < chunk.size(); ++j) {
69 double dist = detail::safe_distance(chunk[i], chunk[j]);
70 if (dist < std::numeric_limits<double>::max()) {
71 distances.push_back(dist);
76 if (distances.empty())
return 0.0;
77 std::sort(distances.begin(), distances.end());
78 return distances[distances.size() / 2];
93 throw std::invalid_argument(
"Empty chunks");
96 std::vector<double> cohesion_values;
97 cohesion_values.reserve(chunks.size());
99 for (
const auto& chunk : chunks) {
100 if (chunk.empty() || chunk.size() > 1000000) {
101 throw std::invalid_argument(
"Invalid chunk size");
103 double chunk_cohesion = compute_chunk_cohesion(chunk);
104 if (std::isfinite(chunk_cohesion)) {
105 cohesion_values.push_back(chunk_cohesion);
109 if (cohesion_values.empty()) {
110 throw std::runtime_error(
"No valid cohesion values computed");
113 std::sort(cohesion_values.begin(), cohesion_values.end());
114 return cohesion_values[cohesion_values.size() / 2];
118 const std::vector<std::vector<T>>& mixed,
120 double& mixed_result)
const {
122 if (well_separated.empty() || mixed.empty()) {
126 high_result = compute_cohesion(well_separated);
127 mixed_result = compute_cohesion(mixed);
129 return std::isfinite(high_result) &&
130 std::isfinite(mixed_result) &&
131 high_result > mixed_result;
138 if (chunks.size() < 2) {
139 throw std::invalid_argument(
"Need at least two chunks for separation");
142 double total_separation = 0.0;
143 size_t valid_pairs = 0;
145 for (
size_t i = 0; i < chunks.size(); ++i) {
146 for (
size_t j = i + 1; j < chunks.size(); ++j) {
147 if (chunks[i].empty() || chunks[j].empty())
continue;
149 double mean_i = detail::safe_mean(chunks[i]);
150 double mean_j = detail::safe_mean(chunks[j]);
152 if (std::isfinite(mean_i) && std::isfinite(mean_j)) {
153 total_separation += std::abs(mean_i - mean_j);
159 if (valid_pairs == 0) {
160 throw std::runtime_error(
"No valid separation values computed");
163 return total_separation / valid_pairs;
167 if (chunks.size() < 2) {
168 throw std::invalid_argument(
"Need at least two chunks for silhouette score");
171 double total_score = 0.0;
172 size_t total_points = 0;
174 for (
size_t i = 0; i < chunks.size(); ++i) {
175 for (
const auto& point : chunks[i]) {
178 size_t same_chunk_count = 0;
179 for (
const auto& other_point : chunks[i]) {
180 if (&point != &other_point) {
181 double dist = detail::safe_distance(point, other_point);
182 if (dist < std::numeric_limits<double>::max()) {
188 a = same_chunk_count > 0 ? a / same_chunk_count : 0.0;
191 double b = std::numeric_limits<double>::max();
192 for (
size_t j = 0; j < chunks.size(); ++j) {
194 double avg_dist = 0.0;
195 size_t valid_dist = 0;
196 for (
const auto& other_point : chunks[j]) {
197 double dist = detail::safe_distance(point, other_point);
198 if (dist < std::numeric_limits<double>::max()) {
203 if (valid_dist > 0) {
204 b = std::min(b, avg_dist / valid_dist);
209 if (std::isfinite(a) && std::isfinite(b) && b < std::numeric_limits<double>::max()) {
210 double max_ab = std::max(a, b);
212 total_score += (b - a) / max_ab;
219 if (total_points == 0) {
220 throw std::runtime_error(
"No valid silhouette scores computed");
223 return total_score / total_points;
227 if (chunks.empty()) {
228 throw std::invalid_argument(
"Empty chunks vector");
232 double cohesion = compute_cohesion(chunks);
233 double separation = chunks.size() > 1 ? compute_separation(chunks) : 1.0;
235 if (!std::isfinite(cohesion) || !std::isfinite(separation)) {
236 throw std::runtime_error(
"Invalid metric values computed");
239 return (cohesion + separation) / 2.0;
240 }
catch (
const std::exception& e) {
241 throw std::runtime_error(std::string(
"Error computing quality score: ") + e.what());
246 if (chunks.empty()) {
247 throw std::invalid_argument(
"Empty chunks vector");
250 std::map<std::string, double> metrics;
251 double avg_size = 0.0;
252 double max_size = 0.0;
253 double min_size =
static_cast<double>(chunks[0].size());
255 for (
const auto& chunk : chunks) {
256 double size =
static_cast<double>(chunk.size());
258 max_size = std::max(max_size, size);
259 min_size = std::min(min_size, size);
261 avg_size /=
static_cast<double>(chunks.size());
263 double variance = 0.0;
264 for (
const auto& chunk : chunks) {
265 double diff =
static_cast<double>(chunk.size()) - avg_size;
266 variance += diff * diff;
268 variance /=
static_cast<double>(chunks.size());
270 metrics[
"average_size"] = avg_size;
271 metrics[
"max_size"] = max_size;
272 metrics[
"min_size"] = min_size;
273 metrics[
"size_variance"] = variance;
274 metrics[
"size_stddev"] = std::sqrt(variance);