Advanced Chunk Processing Library 0.2.0
A comprehensive C++ library for advanced data chunking strategies and processing operations
Loading...
Searching...
No Matches
chunk_visualization.hpp
Go to the documentation of this file.
1/**
2 * @file chunk_visualization.hpp
3 * @brief Visualization utilities for chunk data
4 * @author Jonathan Reich
5 * @date 2024-12-07
6 */
7
8#pragma once
9#include "chunk_common.hpp"
10#include "chunk_errors.hpp"
11#include <algorithm> // for std::abs
12#include <cmath> // Add this for std::sqrt
13#include <filesystem>
14#include <fstream>
15#include <limits>
16#include <sstream>
17#include <string>
18#include <type_traits>
19#include <vector>
20
21namespace chunk_viz {
22
23/**
24 * @brief Class for visualizing chunk data in various formats
25 * @tparam T The data type of the chunks (must be arithmetic)
26 */
27template <typename T>
29private:
30 std::vector<T> data;
31 std::string output_dir;
32 static constexpr double threshold = 1.0; // Default threshold for chunk detection
33
34 // Add helper to calculate difference between elements
35 double calculate_difference(const T& current, const T& previous) const {
36 if constexpr (std::is_arithmetic_v<T>) {
37 return std::abs(current - previous);
38 } else if constexpr (std::is_same_v<T, std::vector<double>> ||
39 std::is_same_v<T, std::vector<float>> ||
40 std::is_same_v<T, std::vector<int>>) {
41 // For vectors, calculate Euclidean distance
42 if (current.size() != previous.size()) {
43 return std::numeric_limits<double>::max();
44 }
45 double sum = 0.0;
46 for (size_t i = 0; i < current.size(); ++i) {
47 double diff = current[i] - previous[i];
48 sum += diff * diff;
49 }
50 return std::sqrt(sum);
51 } else {
53 "Unsupported type for difference calculation");
54 }
55 }
56
57 // Add back format_value with improved implementation
58 std::string format_value(const T& value) const {
59 if constexpr (std::is_arithmetic_v<T>) {
60 return std::to_string(value);
61 } else if constexpr (std::is_same_v<T, std::string>) {
62 return value;
63 } else if constexpr (std::is_same_v<T, std::vector<double>> ||
64 std::is_same_v<T, std::vector<float>> ||
65 std::is_same_v<T, std::vector<int>>) {
66 std::stringstream ss;
67 ss << "[";
68 for (size_t i = 0; i < value.size(); ++i) {
69 ss << value[i];
70 if (i < value.size() - 1)
71 ss << ",";
72 }
73 ss << "]";
74 return ss.str();
75 } else {
76 throw chunk_processing::VisualizationError("Unsupported type for visualization");
77 }
78 }
79
80 // Add helper to ensure data is written
81 void write_data_file(const std::string& filename,
82 const std::vector<std::pair<size_t, size_t>>& chunk_sizes) {
83 std::ofstream file(filename);
84 if (!file) {
85 throw chunk_processing::ChunkingError("Failed to create data file: " + filename);
86 }
87
88 // Write header
89 file << "# Chunk_Index Size\n";
90
91 // Write data with explicit flush
92 for (size_t i = 0; i < chunk_sizes.size(); ++i) {
93 file << i << " " << chunk_sizes[i].second << "\n";
94 file.flush(); // Ensure data is written
95 }
96
97 file.close();
98
99 // Verify file was written
100 std::ifstream check(filename);
101 if (!check || check.peek() == std::ifstream::traits_type::eof()) {
102 throw chunk_processing::ChunkingError("Failed to write data to file: " + filename);
103 }
104 }
105
106public:
107 // Update constructor to handle both single vector and vector of vectors
108 ChunkVisualizer(const std::vector<T>& input_data, const std::string& viz_dir = "./viz")
109 : data(input_data), output_dir(viz_dir) {
110 std::filesystem::create_directories(output_dir);
111 }
112
113 // Add overloaded constructor for vector of vectors
114 template <typename U = T>
115 ChunkVisualizer(const std::vector<std::vector<U>>& input_data,
116 const std::string& viz_dir = "./viz")
117 : data(reinterpret_cast<const std::vector<T>&>(input_data)), output_dir(viz_dir) {
118 std::filesystem::create_directories(output_dir);
119 static_assert(std::is_same_v<T, std::vector<U>>,
120 "Template parameter must match vector element type");
121 }
122
124 // Calculate chunk sizes
125 std::vector<std::pair<size_t, size_t>> chunk_sizes;
126 size_t current_size = 1; // Initialize to 1 since we start from first element
127
128 // Handle empty data case
129 if (data.empty()) {
130 throw chunk_processing::ChunkingError("Cannot plot chunk sizes for empty data");
131 }
132
133 // Handle single element case
134 if (data.size() == 1) {
135 chunk_sizes.push_back({0, 1});
136 } else {
137 // Process chunks
138 for (size_t i = 1; i < data.size(); ++i) {
139 if (calculate_difference(data[i], data[i - 1]) > threshold) {
140 chunk_sizes.push_back({i - current_size, current_size});
141 current_size = 1; // Reset to 1 since we're starting a new chunk
142 } else {
143 current_size++;
144 }
145 }
146
147 // Add final chunk
148 if (current_size > 0) {
149 chunk_sizes.push_back({data.size() - current_size, current_size});
150 }
151 }
152
153 // Write data file
154 std::string dat_file = output_dir + "/chunk_sizes.dat";
155 write_data_file(dat_file, chunk_sizes);
156
157 // Create gnuplot script
158 std::string gnu_file = output_dir + "/plot_chunks.gnu";
159 std::ofstream gnuplot(gnu_file);
160 if (!gnuplot) {
161 throw chunk_processing::ChunkingError("Failed to create gnuplot file");
162 }
163
164 gnuplot << "set title 'Chunk Size Distribution'\n"
165 << "set xlabel 'Chunk Index'\n"
166 << "set ylabel 'Size'\n"
167 << "set style fill solid\n"
168 << "plot '" << dat_file << "' using 1:2 with boxes title 'Chunk Sizes'\n";
169
170 gnuplot.close();
171 }
172
173 void export_to_graphviz(const std::string& filename = "chunks.dot") {
174 // Ensure the output directory exists
175 std::filesystem::create_directories(output_dir);
176
177 // Create full path
178 std::string actual_filename;
179 if (filename.find('/') != std::string::npos) {
180 // If filename contains a path, use it as is
181 actual_filename = filename;
182 } else {
183 // Otherwise, append to output_dir
184 actual_filename = output_dir + "/" + filename;
185 }
186
187 // Create the file
188 std::ofstream file(actual_filename);
189 if (!file.is_open()) {
190 throw chunk_processing::VisualizationError("Failed to create GraphViz file: " +
191 actual_filename);
192 }
193
194 try {
195 file << "digraph chunks {\n";
196 for (size_t i = 0; i < data.size(); ++i) {
197 file << " chunk" << i << " [label=\"Value: " << format_value(data[i]) << "\"];\n";
198 if (i > 0) {
199 file << " chunk" << (i - 1) << " -> chunk" << i << ";\n";
200 }
201 }
202 file << "}\n";
203
204 // Ensure everything is written
205 file.flush();
206
207 if (file.fail()) {
208 throw chunk_processing::VisualizationError("Failed to write to GraphViz file: " +
209 actual_filename);
210 }
211 } catch (const std::exception& e) {
213 std::string("Error writing GraphViz file: ") + e.what());
214 }
215
216 file.close();
217
218 // Verify the file was created and has content
219 if (!std::filesystem::exists(actual_filename) ||
220 std::filesystem::file_size(actual_filename) == 0) {
221 throw chunk_processing::VisualizationError("GraphViz file was not created properly: " +
222 actual_filename);
223 }
224 }
225
227 std::ofstream boundary_data(output_dir + "/boundaries.dat");
228 if (!boundary_data) {
229 throw chunk_processing::VisualizationError("Failed to create boundaries.dat");
230 }
231
232 // Create gnuplot script for boundary visualization
233 std::ofstream gnuplot_script(output_dir + "/plot_boundaries.gnu");
234 if (!gnuplot_script) {
235 throw chunk_processing::VisualizationError("Failed to create gnuplot script");
236 }
237
238 // Write data points and mark boundaries
239 for (size_t i = 0; i < data.size(); ++i) {
240 if constexpr (std::is_arithmetic_v<T>) {
241 boundary_data << i << " " << data[i] << " "
242 << (i > 0 && std::abs(data[i] - data[i - 1]) > 1.0 ? "1" : "0")
243 << "\n";
244 } else {
245 // For vector types, use the first element or size as indicator
246 double value = data[i].empty() ? 0.0 : data[i][0];
247 boundary_data << i << " " << value << " "
248 << (i > 0 && std::abs(value -
249 (data[i - 1].empty() ? 0.0 : data[i - 1][0])) >
250 1.0
251 ? "1"
252 : "0")
253 << "\n";
254 }
255 }
256
257 // Write gnuplot script
258 gnuplot_script
259 << "set terminal png\n"
260 << "set output '" << output_dir << "/boundaries.png'\n"
261 << "set title 'Chunk Boundaries'\n"
262 << "set xlabel 'Index'\n"
263 << "set ylabel 'Value'\n"
264 << "plot '" << output_dir << "/boundaries.dat' using 1:2 with lines title 'Data', "
265 << " '" << output_dir
266 << "/boundaries.dat' using 1:($3 * $2) with points pt 7 title 'Boundaries'\n";
267 }
268
269 const std::vector<T>& get_data() const {
270 return data;
271 }
272 const std::string& get_output_dir() const {
273 return output_dir;
274 }
275};
276} // namespace chunk_viz
#define CHUNK_EXPORT
Base exception class for chunking operations.
Exception for visualization errors.
Class for visualizing chunk data in various formats.
std::string format_value(const T &value) const
void export_to_graphviz(const std::string &filename="chunks.dot")
ChunkVisualizer(const std::vector< T > &input_data, const std::string &viz_dir="./viz")
const std::string & get_output_dir() const
double calculate_difference(const T &current, const T &previous) const
ChunkVisualizer(const std::vector< std::vector< U > > &input_data, const std::string &viz_dir="./viz")
void write_data_file(const std::string &filename, const std::vector< std::pair< size_t, size_t > > &chunk_sizes)
const std::vector< T > & get_data() const