Advanced Chunk Processing Library 0.2.0
A comprehensive C++ library for advanced data chunking strategies and processing operations
Loading...
Searching...
No Matches
chunk_bindings.cpp
Go to the documentation of this file.
1#include <pybind11/numpy.h>
2#include <pybind11/pybind11.h>
3#include <pybind11/stl.h>
4
5#include "chunk.hpp"
6#include "chunk_benchmark.hpp"
9#include "chunk_metrics.hpp"
11#include "chunk_strategies.hpp"
14#include "neural_chunking.hpp"
15#ifdef HAVE_CUDA
16#include "gpu_chunking.hpp"
17#endif
19
20namespace py = pybind11;
21
22PYBIND11_MODULE(chunking_cpp, m) {
23 m.doc() = "Python bindings for the C++ chunking library";
24
25 // Register exception translators
26 py::register_exception_translator([](std::exception_ptr p) {
27 try {
28 if (p)
29 std::rethrow_exception(p);
30 } catch (const std::invalid_argument& e) {
31 PyErr_SetString(PyExc_ValueError, e.what());
32 } catch (const std::runtime_error& e) {
33 PyErr_SetString(PyExc_RuntimeError, e.what());
34 } catch (const std::exception& e) {
35 PyErr_SetString(PyExc_RuntimeError, e.what());
36 }
37 });
38
39 // Basic Chunking
40 py::class_<chunk_processing::Chunk<double>>(m, "Chunk")
41 .def(py::init<size_t>())
42 .def("add",
43 static_cast<void (chunk_processing::Chunk<double>::*)(const double&)>(
45 "Add a single element")
46 .def(
47 "add",
48 [](chunk_processing::Chunk<double>& self, const std::vector<double>& data) {
49 if (data.empty()) {
50 throw std::invalid_argument("Cannot add empty vector");
51 }
52 self.add(data);
53 },
54 "Add multiple elements")
58
59 py::class_<chunk_processing::Chunk<std::vector<double>>>(m, "Chunk2D")
60 .def(py::init<size_t>())
61 .def("add",
62 [](chunk_processing::Chunk<std::vector<double>>& self,
63 py::array_t<double, py::array::c_style>& data) {
64 auto buf = data.request();
65 if (buf.ndim != 2) {
66 throw std::invalid_argument("Expected 2D array");
67 }
68
69 std::vector<std::vector<double>> nested_data;
70 nested_data.reserve(buf.shape[0]);
71
72 for (size_t i = 0; i < buf.shape[0]; ++i) {
73 std::vector<double> row(static_cast<double*>(buf.ptr) + i * buf.shape[1],
74 static_cast<double*>(buf.ptr) +
75 (i + 1) * buf.shape[1]);
76 nested_data.push_back(row);
77 }
78 self.add(nested_data);
79 })
80 .def("get_chunks", [](chunk_processing::Chunk<std::vector<double>>& self) {
81 auto chunks = self.get_chunks();
82 py::list result;
83 for (const auto& chunk : chunks) {
84 // Convert each chunk to numpy array
85 ssize_t rows = chunk.size();
86 ssize_t cols = rows > 0 ? chunk[0].size() : 0;
87
88 auto array = py::array_t<double>({rows, cols});
89 auto buf = array.request();
90 double* ptr = static_cast<double*>(buf.ptr);
91
92 for (size_t i = 0; i < rows; ++i) {
93 std::copy(chunk[i].begin(), chunk[i].end(), ptr + i * cols);
94 }
95 result.append(array);
96 }
97 return result;
98 });
99
100 py::class_<chunk_processing::Chunk<std::vector<std::vector<double>>>>(m, "Chunk3D")
101 .def(py::init<size_t>())
102 .def("add",
103 [](chunk_processing::Chunk<std::vector<std::vector<double>>>& self,
104 py::array_t<double, py::array::c_style>& data) {
105 auto buf = data.request();
106 if (buf.ndim != 3) {
107 throw std::invalid_argument("Expected 3D array");
108 }
109
110 std::vector<std::vector<std::vector<double>>> nested_data;
111 nested_data.reserve(buf.shape[0]);
112
113 double* ptr = static_cast<double*>(buf.ptr);
114 for (size_t i = 0; i < buf.shape[0]; ++i) {
115 std::vector<std::vector<double>> matrix;
116 matrix.reserve(buf.shape[1]);
117 for (size_t j = 0; j < buf.shape[1]; ++j) {
118 std::vector<double> row(
119 ptr + (i * buf.shape[1] * buf.shape[2]) + (j * buf.shape[2]),
120 ptr + (i * buf.shape[1] * buf.shape[2]) + ((j + 1) * buf.shape[2]));
121 matrix.push_back(row);
122 }
123 nested_data.push_back(matrix);
124 }
125 self.add(nested_data);
126 })
127 .def("get_chunks", [](chunk_processing::Chunk<std::vector<std::vector<double>>>& self) {
128 auto chunks = self.get_chunks();
129 py::list result;
130 for (const auto& chunk : chunks) {
131 // Convert each chunk to numpy array
132 if (chunk.empty() || chunk[0].empty())
133 continue;
134
135 ssize_t depth = chunk.size();
136 ssize_t rows = chunk[0].size();
137 ssize_t cols = chunk[0][0].size();
138
139 auto array = py::array_t<double>({depth, rows, cols});
140 auto buf = array.request();
141 double* ptr = static_cast<double*>(buf.ptr);
142
143 for (size_t i = 0; i < depth; ++i) {
144 for (size_t j = 0; j < rows; ++j) {
145 std::copy(chunk[i][j].begin(), chunk[i][j].end(),
146 ptr + (i * rows * cols) + (j * cols));
147 }
148 }
149 result.append(array);
150 }
151 return result;
152 });
153
154 // Neural Chunking
155 py::class_<neural_chunking::NeuralChunking<double>>(m, "NeuralChunking")
156 .def(py::init<size_t, double>())
157 .def("chunk",
158 [](neural_chunking::NeuralChunking<double>& self, const std::vector<double>& data) {
159 auto chunks = self.chunk(data);
160 py::list result;
161 for (const auto& chunk : chunks) {
162 result.append(py::array_t<double>(chunk.size(), chunk.data()));
163 }
164 return result;
165 })
178 .def("train",
179 [](neural_chunking::NeuralChunking<double>& self, const std::vector<double>& data) {
180 auto losses = self.train(data);
181 return py::array_t<double>(losses.size(), losses.data());
182 });
183
184 // GPU Chunking
185#ifdef HAVE_CUDA
186 py::class_<gpu_chunking::GPUChunkProcessor<double>>(m, "GPUChunkProcessor")
187 .def(py::init<>())
188 .def("process_on_gpu", &gpu_chunking::GPUChunkProcessor<double>::process_on_gpu);
189#endif
190
191 // Sophisticated Chunking
192 py::class_<sophisticated_chunking::WaveletChunking<double>>(m, "WaveletChunking")
193 .def(py::init<size_t, double>())
194 .def("chunk",
196 const std::vector<double>& data) {
197 auto chunks = self.chunk(data);
198 py::list result;
199 for (const auto& chunk : chunks) {
200 result.append(py::array_t<double>(chunk.size(), chunk.data()));
201 }
202 return result;
203 })
209 .def("set_wavelet_type",
211
212 py::class_<sophisticated_chunking::MutualInformationChunking<double>>(
213 m, "MutualInformationChunking")
214 .def(py::init<size_t, double>())
216 const std::vector<double>& data) {
217 auto chunks = self.chunk(data);
218 py::list result;
219 for (const auto& chunk : chunks) {
220 // Convert each chunk to numpy array
221 result.append(py::array_t<double>(chunk.size(), chunk.data()));
222 }
223 return result;
224 });
225
226 py::class_<sophisticated_chunking::DTWChunking<double>>(m, "DTWChunking")
227 .def(py::init<size_t, double>(), py::arg("window_size") = 10, py::arg("threshold") = 1.0)
233 .def("get_distance_metric",
235 .def("set_distance_metric",
237
238 // Chunk Metrics
239 py::class_<chunk_metrics::ChunkQualityAnalyzer<double>>(m, "ChunkQualityAnalyzer")
240 .def(py::init<>())
243 .def("compute_silhouette_score",
245 .def("compute_quality_score",
247 .def("compute_size_metrics",
250
251 // Chunk Visualization
252 py::class_<chunk_viz::ChunkVisualizer<double>>(m, "ChunkVisualizer")
253 .def(py::init<const std::vector<double>&, const std::string&>())
258
259 // Chunk Serialization
260 py::class_<chunk_serialization::ChunkSerializer<double>>(m, "ChunkSerializer")
261 .def(py::init<>())
265
266 // Database Integration
267#ifdef HAVE_POSTGRESQL
268 py::class_<chunk_integrations::DatabaseChunkStore>(m, "DatabaseChunkStore")
269 .def(
270 py::init<std::unique_ptr<chunk_integrations::DatabaseConnection>, const std::string&>())
271 .def("store_chunks_postgres",
272 &chunk_integrations::DatabaseChunkStore::store_chunks_postgres<double>)
273#ifdef HAVE_MONGODB
274 .def("store_chunks_mongodb",
275 &chunk_integrations::DatabaseChunkStore::store_chunks_mongodb<double>)
276#endif
277 ;
278#endif
279
280 // Message Queue Integration
281#if defined(HAVE_KAFKA) || defined(HAVE_RABBITMQ)
282 py::class_<chunk_integrations::ChunkMessageQueue>(m, "ChunkMessageQueue")
283 .def(py::init<std::unique_ptr<chunk_integrations::MessageQueueConnection>,
284 const std::string&>())
285#ifdef HAVE_KAFKA
286 .def("publish_chunks_kafka",
287 &chunk_integrations::ChunkMessageQueue::publish_chunks_kafka<double>)
288#endif
289#ifdef HAVE_RABBITMQ
290 .def("publish_chunks_rabbitmq",
291 &chunk_integrations::ChunkMessageQueue::publish_chunks_rabbitmq<double>)
292#endif
293 ;
294#endif
295
296 // Benchmark bindings
297 py::class_<chunk_benchmark::BenchmarkResult>(m, "BenchmarkResult")
298 .def_readwrite("execution_time_ms", &chunk_benchmark::BenchmarkResult::execution_time_ms)
299 .def_readwrite("memory_usage_bytes", &chunk_benchmark::BenchmarkResult::memory_usage_bytes)
300 .def_readwrite("num_chunks", &chunk_benchmark::BenchmarkResult::num_chunks)
301 .def_readwrite("strategy_name", &chunk_benchmark::BenchmarkResult::strategy_name);
302
303 py::class_<chunk_benchmark::ChunkBenchmark<double>>(m, "ChunkBenchmark")
304 .def(py::init<const std::vector<double>&, size_t>(), py::arg("data"),
305 py::arg("num_iterations") = 100)
309
310 // Add exception translations
311 py::register_exception<chunk_processing::ChunkingError>(m, "ChunkingError");
312
313 // Strategy bindings
314 py::class_<chunk_processing::ChunkStrategy<double>,
315 std::shared_ptr<chunk_processing::ChunkStrategy<double>>>(m, "ChunkStrategy")
317
318 py::class_<chunk_processing::NeuralChunkingStrategy<double>,
320 std::shared_ptr<chunk_processing::NeuralChunkingStrategy<double>>>(
321 m, "NeuralChunkingStrategy")
322 .def(py::init<>())
324
325 py::class_<chunk_processing::SimilarityChunkingStrategy<double>,
327 std::shared_ptr<chunk_processing::SimilarityChunkingStrategy<double>>>(
328 m, "SimilarityChunkingStrategy")
329 .def(py::init<double>())
331}
Performance benchmarking tools for chunk processing.
PYBIND11_MODULE(chunking_cpp, m)
Integration utilities for external systems and databases.
Quality metrics and analysis tools for chunk evaluation.
Serialization utilities for chunk data.
Defines various strategies for chunking data based on different criteria.
Visualization utilities for chunk data.
A template class for managing and processing data in chunks.
Definition chunk.hpp:16
std::vector< std::vector< T > > get_chunks() const
Definition chunk.hpp:182
void add(const T &element)
Definition chunk.hpp:130
Class for serializing chunks to various formats.
Class for visualizing chunk data in various formats.
Class implementing neural network-based chunking.
Dynamic time warping based chunking for sequence alignment.
Information theory based chunking using mutual information.
Wavelet-based chunking strategy using signal processing principles.
Neural network-based chunking algorithms.