Advanced Chunk Processing Library 0.2.0
A comprehensive C++ library for advanced data chunking strategies and processing operations
Loading...
Searching...
No Matches
chunk_bindings.cpp File Reference
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include "chunk.hpp"
#include "chunk_benchmark.hpp"
#include "chunk_compression.hpp"
#include "chunk_integrations.hpp"
#include "chunk_metrics.hpp"
#include "chunk_serialization.hpp"
#include "chunk_strategies.hpp"
#include "chunk_strategy_implementations.hpp"
#include "chunk_visualization.hpp"
#include "neural_chunking.hpp"
#include "sophisticated_chunking.hpp"
+ Include dependency graph for chunk_bindings.cpp:

Go to the source code of this file.

Functions

 PYBIND11_MODULE (chunking_cpp, m)
 

Function Documentation

◆ PYBIND11_MODULE()

PYBIND11_MODULE ( chunking_cpp  ,
 
)

Definition at line 22 of file chunk_bindings.cpp.

22 {
23 m.doc() = "Python bindings for the C++ chunking library";
24
25 // Register exception translators
26 py::register_exception_translator([](std::exception_ptr p) {
27 try {
28 if (p)
29 std::rethrow_exception(p);
30 } catch (const std::invalid_argument& e) {
31 PyErr_SetString(PyExc_ValueError, e.what());
32 } catch (const std::runtime_error& e) {
33 PyErr_SetString(PyExc_RuntimeError, e.what());
34 } catch (const std::exception& e) {
35 PyErr_SetString(PyExc_RuntimeError, e.what());
36 }
37 });
38
39 // Basic Chunking
40 py::class_<chunk_processing::Chunk<double>>(m, "Chunk")
41 .def(py::init<size_t>())
42 .def("add",
43 static_cast<void (chunk_processing::Chunk<double>::*)(const double&)>(
45 "Add a single element")
46 .def(
47 "add",
48 [](chunk_processing::Chunk<double>& self, const std::vector<double>& data) {
49 if (data.empty()) {
50 throw std::invalid_argument("Cannot add empty vector");
51 }
52 self.add(data);
53 },
54 "Add multiple elements")
58
59 py::class_<chunk_processing::Chunk<std::vector<double>>>(m, "Chunk2D")
60 .def(py::init<size_t>())
61 .def("add",
62 [](chunk_processing::Chunk<std::vector<double>>& self,
63 py::array_t<double, py::array::c_style>& data) {
64 auto buf = data.request();
65 if (buf.ndim != 2) {
66 throw std::invalid_argument("Expected 2D array");
67 }
68
69 std::vector<std::vector<double>> nested_data;
70 nested_data.reserve(buf.shape[0]);
71
72 for (size_t i = 0; i < buf.shape[0]; ++i) {
73 std::vector<double> row(static_cast<double*>(buf.ptr) + i * buf.shape[1],
74 static_cast<double*>(buf.ptr) +
75 (i + 1) * buf.shape[1]);
76 nested_data.push_back(row);
77 }
78 self.add(nested_data);
79 })
80 .def("get_chunks", [](chunk_processing::Chunk<std::vector<double>>& self) {
81 auto chunks = self.get_chunks();
82 py::list result;
83 for (const auto& chunk : chunks) {
84 // Convert each chunk to numpy array
85 ssize_t rows = chunk.size();
86 ssize_t cols = rows > 0 ? chunk[0].size() : 0;
87
88 auto array = py::array_t<double>({rows, cols});
89 auto buf = array.request();
90 double* ptr = static_cast<double*>(buf.ptr);
91
92 for (size_t i = 0; i < rows; ++i) {
93 std::copy(chunk[i].begin(), chunk[i].end(), ptr + i * cols);
94 }
95 result.append(array);
96 }
97 return result;
98 });
99
100 py::class_<chunk_processing::Chunk<std::vector<std::vector<double>>>>(m, "Chunk3D")
101 .def(py::init<size_t>())
102 .def("add",
103 [](chunk_processing::Chunk<std::vector<std::vector<double>>>& self,
104 py::array_t<double, py::array::c_style>& data) {
105 auto buf = data.request();
106 if (buf.ndim != 3) {
107 throw std::invalid_argument("Expected 3D array");
108 }
109
110 std::vector<std::vector<std::vector<double>>> nested_data;
111 nested_data.reserve(buf.shape[0]);
112
113 double* ptr = static_cast<double*>(buf.ptr);
114 for (size_t i = 0; i < buf.shape[0]; ++i) {
115 std::vector<std::vector<double>> matrix;
116 matrix.reserve(buf.shape[1]);
117 for (size_t j = 0; j < buf.shape[1]; ++j) {
118 std::vector<double> row(
119 ptr + (i * buf.shape[1] * buf.shape[2]) + (j * buf.shape[2]),
120 ptr + (i * buf.shape[1] * buf.shape[2]) + ((j + 1) * buf.shape[2]));
121 matrix.push_back(row);
122 }
123 nested_data.push_back(matrix);
124 }
125 self.add(nested_data);
126 })
127 .def("get_chunks", [](chunk_processing::Chunk<std::vector<std::vector<double>>>& self) {
128 auto chunks = self.get_chunks();
129 py::list result;
130 for (const auto& chunk : chunks) {
131 // Convert each chunk to numpy array
132 if (chunk.empty() || chunk[0].empty())
133 continue;
134
135 ssize_t depth = chunk.size();
136 ssize_t rows = chunk[0].size();
137 ssize_t cols = chunk[0][0].size();
138
139 auto array = py::array_t<double>({depth, rows, cols});
140 auto buf = array.request();
141 double* ptr = static_cast<double*>(buf.ptr);
142
143 for (size_t i = 0; i < depth; ++i) {
144 for (size_t j = 0; j < rows; ++j) {
145 std::copy(chunk[i][j].begin(), chunk[i][j].end(),
146 ptr + (i * rows * cols) + (j * cols));
147 }
148 }
149 result.append(array);
150 }
151 return result;
152 });
153
154 // Neural Chunking
155 py::class_<neural_chunking::NeuralChunking<double>>(m, "NeuralChunking")
156 .def(py::init<size_t, double>())
157 .def("chunk",
158 [](neural_chunking::NeuralChunking<double>& self, const std::vector<double>& data) {
159 auto chunks = self.chunk(data);
160 py::list result;
161 for (const auto& chunk : chunks) {
162 result.append(py::array_t<double>(chunk.size(), chunk.data()));
163 }
164 return result;
165 })
178 .def("train",
179 [](neural_chunking::NeuralChunking<double>& self, const std::vector<double>& data) {
180 auto losses = self.train(data);
181 return py::array_t<double>(losses.size(), losses.data());
182 });
183
184 // GPU Chunking
185#ifdef HAVE_CUDA
186 py::class_<gpu_chunking::GPUChunkProcessor<double>>(m, "GPUChunkProcessor")
187 .def(py::init<>())
188 .def("process_on_gpu", &gpu_chunking::GPUChunkProcessor<double>::process_on_gpu);
189#endif
190
191 // Sophisticated Chunking
192 py::class_<sophisticated_chunking::WaveletChunking<double>>(m, "WaveletChunking")
193 .def(py::init<size_t, double>())
194 .def("chunk",
196 const std::vector<double>& data) {
197 auto chunks = self.chunk(data);
198 py::list result;
199 for (const auto& chunk : chunks) {
200 result.append(py::array_t<double>(chunk.size(), chunk.data()));
201 }
202 return result;
203 })
209 .def("set_wavelet_type",
211
212 py::class_<sophisticated_chunking::MutualInformationChunking<double>>(
213 m, "MutualInformationChunking")
214 .def(py::init<size_t, double>())
216 const std::vector<double>& data) {
217 auto chunks = self.chunk(data);
218 py::list result;
219 for (const auto& chunk : chunks) {
220 // Convert each chunk to numpy array
221 result.append(py::array_t<double>(chunk.size(), chunk.data()));
222 }
223 return result;
224 });
225
226 py::class_<sophisticated_chunking::DTWChunking<double>>(m, "DTWChunking")
227 .def(py::init<size_t, double>(), py::arg("window_size") = 10, py::arg("threshold") = 1.0)
233 .def("get_distance_metric",
235 .def("set_distance_metric",
237
238 // Chunk Metrics
239 py::class_<chunk_metrics::ChunkQualityAnalyzer<double>>(m, "ChunkQualityAnalyzer")
240 .def(py::init<>())
243 .def("compute_silhouette_score",
245 .def("compute_quality_score",
247 .def("compute_size_metrics",
250
251 // Chunk Visualization
252 py::class_<chunk_viz::ChunkVisualizer<double>>(m, "ChunkVisualizer")
253 .def(py::init<const std::vector<double>&, const std::string&>())
258
259 // Chunk Serialization
260 py::class_<chunk_serialization::ChunkSerializer<double>>(m, "ChunkSerializer")
261 .def(py::init<>())
265
266 // Database Integration
267#ifdef HAVE_POSTGRESQL
268 py::class_<chunk_integrations::DatabaseChunkStore>(m, "DatabaseChunkStore")
269 .def(
270 py::init<std::unique_ptr<chunk_integrations::DatabaseConnection>, const std::string&>())
271 .def("store_chunks_postgres",
272 &chunk_integrations::DatabaseChunkStore::store_chunks_postgres<double>)
273#ifdef HAVE_MONGODB
274 .def("store_chunks_mongodb",
275 &chunk_integrations::DatabaseChunkStore::store_chunks_mongodb<double>)
276#endif
277 ;
278#endif
279
280 // Message Queue Integration
281#if defined(HAVE_KAFKA) || defined(HAVE_RABBITMQ)
282 py::class_<chunk_integrations::ChunkMessageQueue>(m, "ChunkMessageQueue")
283 .def(py::init<std::unique_ptr<chunk_integrations::MessageQueueConnection>,
284 const std::string&>())
285#ifdef HAVE_KAFKA
286 .def("publish_chunks_kafka",
287 &chunk_integrations::ChunkMessageQueue::publish_chunks_kafka<double>)
288#endif
289#ifdef HAVE_RABBITMQ
290 .def("publish_chunks_rabbitmq",
291 &chunk_integrations::ChunkMessageQueue::publish_chunks_rabbitmq<double>)
292#endif
293 ;
294#endif
295
296 // Benchmark bindings
297 py::class_<chunk_benchmark::BenchmarkResult>(m, "BenchmarkResult")
298 .def_readwrite("execution_time_ms", &chunk_benchmark::BenchmarkResult::execution_time_ms)
299 .def_readwrite("memory_usage_bytes", &chunk_benchmark::BenchmarkResult::memory_usage_bytes)
300 .def_readwrite("num_chunks", &chunk_benchmark::BenchmarkResult::num_chunks)
301 .def_readwrite("strategy_name", &chunk_benchmark::BenchmarkResult::strategy_name);
302
303 py::class_<chunk_benchmark::ChunkBenchmark<double>>(m, "ChunkBenchmark")
304 .def(py::init<const std::vector<double>&, size_t>(), py::arg("data"),
305 py::arg("num_iterations") = 100)
309
310 // Add exception translations
311 py::register_exception<chunk_processing::ChunkingError>(m, "ChunkingError");
312
313 // Strategy bindings
314 py::class_<chunk_processing::ChunkStrategy<double>,
315 std::shared_ptr<chunk_processing::ChunkStrategy<double>>>(m, "ChunkStrategy")
317
318 py::class_<chunk_processing::NeuralChunkingStrategy<double>,
320 std::shared_ptr<chunk_processing::NeuralChunkingStrategy<double>>>(
321 m, "NeuralChunkingStrategy")
322 .def(py::init<>())
324
325 py::class_<chunk_processing::SimilarityChunkingStrategy<double>,
327 std::shared_ptr<chunk_processing::SimilarityChunkingStrategy<double>>>(
328 m, "SimilarityChunkingStrategy")
329 .def(py::init<double>())
331}
A template class for managing and processing data in chunks.
Definition chunk.hpp:16
std::vector< std::vector< T > > get_chunks() const
Definition chunk.hpp:182
void add(const T &element)
Definition chunk.hpp:130
Class for serializing chunks to various formats.
Class for visualizing chunk data in various formats.
Class implementing neural network-based chunking.
std::vector< double > train(const std::vector< T > &data)
Train the neural network on the provided data.
std::vector< std::vector< T > > chunk(const std::vector< T > &data) const
Dynamic time warping based chunking for sequence alignment.
Information theory based chunking using mutual information.
std::vector< std::vector< T > > chunk(const std::vector< T > &data) const
Chunk data based on mutual information analysis.
Wavelet-based chunking strategy using signal processing principles.
std::vector< std::vector< T > > chunk(const std::vector< T > &data) const
Chunk data based on wavelet transform analysis.

References chunk_processing::Chunk< T >::add(), chunk_benchmark::BenchmarkResult::execution_time_ms, chunk_processing::Chunk< T >::get_chunks(), chunk_benchmark::BenchmarkResult::memory_usage_bytes, chunk_benchmark::BenchmarkResult::num_chunks, and chunk_benchmark::BenchmarkResult::strategy_name.