23 m.doc() =
"Python bindings for the C++ chunking library";
26 py::register_exception_translator([](std::exception_ptr p) {
29 std::rethrow_exception(p);
30 }
catch (
const std::invalid_argument& e) {
31 PyErr_SetString(PyExc_ValueError, e.what());
32 }
catch (
const std::runtime_error& e) {
33 PyErr_SetString(PyExc_RuntimeError, e.what());
34 }
catch (
const std::exception& e) {
35 PyErr_SetString(PyExc_RuntimeError, e.what());
40 py::class_<chunk_processing::Chunk<double>>(m,
"Chunk")
41 .def(py::init<size_t>())
45 "Add a single element")
50 throw std::invalid_argument(
"Cannot add empty vector");
54 "Add multiple elements")
59 py::class_<chunk_processing::Chunk<std::vector<double>>>(m,
"Chunk2D")
60 .def(py::init<size_t>())
63 py::array_t<double, py::array::c_style>& data) {
64 auto buf = data.request();
66 throw std::invalid_argument(
"Expected 2D array");
69 std::vector<std::vector<double>> nested_data;
70 nested_data.reserve(buf.shape[0]);
72 for (
size_t i = 0; i < buf.shape[0]; ++i) {
73 std::vector<double> row(
static_cast<double*
>(buf.ptr) + i * buf.shape[1],
74 static_cast<double*
>(buf.ptr) +
75 (i + 1) * buf.shape[1]);
76 nested_data.push_back(row);
78 self.
add(nested_data);
83 for (
const auto& chunk : chunks) {
85 ssize_t rows = chunk.size();
86 ssize_t cols = rows > 0 ? chunk[0].size() : 0;
88 auto array = py::array_t<double>({rows, cols});
89 auto buf = array.request();
90 double* ptr =
static_cast<double*
>(buf.ptr);
92 for (
size_t i = 0; i < rows; ++i) {
93 std::copy(chunk[i].begin(), chunk[i].end(), ptr + i * cols);
100 py::class_<chunk_processing::Chunk<std::vector<std::vector<double>>>>(m,
"Chunk3D")
101 .def(py::init<size_t>())
104 py::array_t<double, py::array::c_style>& data) {
105 auto buf = data.request();
107 throw std::invalid_argument(
"Expected 3D array");
110 std::vector<std::vector<std::vector<double>>> nested_data;
111 nested_data.reserve(buf.shape[0]);
113 double* ptr =
static_cast<double*
>(buf.ptr);
114 for (
size_t i = 0; i < buf.shape[0]; ++i) {
115 std::vector<std::vector<double>> matrix;
116 matrix.reserve(buf.shape[1]);
117 for (
size_t j = 0; j < buf.shape[1]; ++j) {
118 std::vector<double> row(
119 ptr + (i * buf.shape[1] * buf.shape[2]) + (j * buf.shape[2]),
120 ptr + (i * buf.shape[1] * buf.shape[2]) + ((j + 1) * buf.shape[2]));
121 matrix.push_back(row);
123 nested_data.push_back(matrix);
125 self.
add(nested_data);
130 for (
const auto& chunk : chunks) {
132 if (chunk.empty() || chunk[0].empty())
135 ssize_t depth = chunk.size();
136 ssize_t rows = chunk[0].size();
137 ssize_t cols = chunk[0][0].size();
139 auto array = py::array_t<double>({depth, rows, cols});
140 auto buf = array.request();
141 double* ptr =
static_cast<double*
>(buf.ptr);
143 for (
size_t i = 0; i < depth; ++i) {
144 for (
size_t j = 0; j < rows; ++j) {
145 std::copy(chunk[i][j].begin(), chunk[i][j].end(),
146 ptr + (i * rows * cols) + (j * cols));
149 result.append(array);
155 py::class_<neural_chunking::NeuralChunking<double>>(m,
"NeuralChunking")
156 .def(py::init<size_t, double>())
159 auto chunks = self.chunk(data);
161 for (
const auto& chunk : chunks) {
162 result.append(py::array_t<double>(chunk.size(), chunk.data()));
180 auto losses = self.train(data);
181 return py::array_t<double>(losses.size(), losses.data());
186 py::class_<gpu_chunking::GPUChunkProcessor<double>>(m,
"GPUChunkProcessor")
188 .def(
"process_on_gpu", &gpu_chunking::GPUChunkProcessor<double>::process_on_gpu);
192 py::class_<sophisticated_chunking::WaveletChunking<double>>(m,
"WaveletChunking")
193 .def(py::init<size_t, double>())
196 const std::vector<double>& data) {
197 auto chunks = self.chunk(data);
199 for (
const auto& chunk : chunks) {
200 result.append(py::array_t<double>(chunk.size(), chunk.data()));
209 .def(
"set_wavelet_type",
212 py::class_<sophisticated_chunking::MutualInformationChunking<double>>(
213 m,
"MutualInformationChunking")
214 .def(py::init<size_t, double>())
216 const std::vector<double>& data) {
217 auto chunks = self.chunk(data);
219 for (
const auto& chunk : chunks) {
221 result.append(py::array_t<double>(chunk.size(), chunk.data()));
226 py::class_<sophisticated_chunking::DTWChunking<double>>(m,
"DTWChunking")
227 .def(py::init<size_t, double>(), py::arg(
"window_size") = 10, py::arg(
"threshold") = 1.0)
233 .def(
"get_distance_metric",
235 .def(
"set_distance_metric",
239 py::class_<chunk_metrics::ChunkQualityAnalyzer<double>>(m,
"ChunkQualityAnalyzer")
243 .def(
"compute_silhouette_score",
245 .def(
"compute_quality_score",
247 .def(
"compute_size_metrics",
252 py::class_<chunk_viz::ChunkVisualizer<double>>(m,
"ChunkVisualizer")
253 .def(py::init<
const std::vector<double>&,
const std::string&>())
260 py::class_<chunk_serialization::ChunkSerializer<double>>(m,
"ChunkSerializer")
267#ifdef HAVE_POSTGRESQL
268 py::class_<chunk_integrations::DatabaseChunkStore>(m,
"DatabaseChunkStore")
270 py::init<std::unique_ptr<chunk_integrations::DatabaseConnection>,
const std::string&>())
271 .def(
"store_chunks_postgres",
272 &chunk_integrations::DatabaseChunkStore::store_chunks_postgres<double>)
274 .def(
"store_chunks_mongodb",
275 &chunk_integrations::DatabaseChunkStore::store_chunks_mongodb<double>)
281#if defined(HAVE_KAFKA) || defined(HAVE_RABBITMQ)
282 py::class_<chunk_integrations::ChunkMessageQueue>(m,
"ChunkMessageQueue")
283 .def(py::init<std::unique_ptr<chunk_integrations::MessageQueueConnection>,
284 const std::string&>())
286 .def(
"publish_chunks_kafka",
287 &chunk_integrations::ChunkMessageQueue::publish_chunks_kafka<double>)
290 .def(
"publish_chunks_rabbitmq",
291 &chunk_integrations::ChunkMessageQueue::publish_chunks_rabbitmq<double>)
297 py::class_<chunk_benchmark::BenchmarkResult>(m,
"BenchmarkResult")
303 py::class_<chunk_benchmark::ChunkBenchmark<double>>(m,
"ChunkBenchmark")
304 .def(py::init<
const std::vector<double>&,
size_t>(), py::arg(
"data"),
305 py::arg(
"num_iterations") = 100)
311 py::register_exception<chunk_processing::ChunkingError>(m,
"ChunkingError");
314 py::class_<chunk_processing::ChunkStrategy<double>,
315 std::shared_ptr<chunk_processing::ChunkStrategy<double>>>(m,
"ChunkStrategy")
318 py::class_<chunk_processing::NeuralChunkingStrategy<double>,
320 std::shared_ptr<chunk_processing::NeuralChunkingStrategy<double>>>(
321 m,
"NeuralChunkingStrategy")
325 py::class_<chunk_processing::SimilarityChunkingStrategy<double>,
327 std::shared_ptr<chunk_processing::SimilarityChunkingStrategy<double>>>(
328 m,
"SimilarityChunkingStrategy")
329 .def(py::init<double>())