Advanced Chunk Processing Library 0.2.0
A comprehensive C++ library for advanced data chunking strategies and processing operations
Loading...
Searching...
No Matches
sophisticated_chunking::DTWChunking< T > Class Template Reference

Dynamic time warping based chunking for sequence alignment. More...

#include <sophisticated_chunking.hpp>

+ Collaboration diagram for sophisticated_chunking::DTWChunking< T >:

Public Member Functions

 DTWChunking (size_t window_size=10, double dtw_threshold=1.0)
 Constructor for DTW-based chunking.
 
std::vector< std::vector< T > > chunk (const std::vector< T > &data) const
 Chunk data based on DTW analysis.
 
std::string get_distance_metric () const
 Get the distance metric.
 
double get_dtw_threshold () const
 Get the threshold for chunk boundaries.
 
size_t get_window_size () const
 Get the size of the warping window.
 
void set_distance_metric (const std::string &metric)
 Set the distance metric.
 
void set_dtw_threshold (double threshold)
 Set the threshold for chunk boundaries.
 
void set_window_size (size_t size)
 Set the size of the warping window.
 

Private Member Functions

double calculate_distance (double a, double b) const
 
double compute_dtw_core (const std::vector< double > &seq1, const std::vector< double > &seq2) const
 
double compute_dtw_distance (const std::vector< T > &seq1, const std::vector< T > &seq2) const
 
double computeDTWDistance (const std::vector< T > &seq1, const std::vector< T > &seq2) const
 Compute DTW distance between sequences.
 
template<typename U >
std::vector< double > flatten_features (const U &data) const
 

Private Attributes

std::string distance_metric_
 
double dtw_threshold_
 
size_t window_size_
 

Detailed Description

template<typename T>
class sophisticated_chunking::DTWChunking< T >

Dynamic time warping based chunking for sequence alignment.

Template Parameters
TThe type of elements to be chunked

Definition at line 310 of file sophisticated_chunking.hpp.

Constructor & Destructor Documentation

◆ DTWChunking()

template<typename T >
sophisticated_chunking::DTWChunking< T >::DTWChunking ( size_t  window_size = 10,
double  dtw_threshold = 1.0 
)
inline

Constructor for DTW-based chunking.

Parameters
window_sizeSize of the warping window
dtw_thresholdThreshold for chunk boundaries

Definition at line 408 of file sophisticated_chunking.hpp.

Member Function Documentation

◆ calculate_distance()

template<typename T >
double sophisticated_chunking::DTWChunking< T >::calculate_distance ( double  a,
double  b 
) const
inlineprivate

Definition at line 316 of file sophisticated_chunking.hpp.

316 {
317 if (distance_metric_ == "manhattan") {
318 return std::abs(a - b);
319 } else if (distance_metric_ == "cosine") {
320 double dot = a * b;
321 double norm_a = std::abs(a);
322 double norm_b = std::abs(b);
323 if (norm_a == 0 || norm_b == 0)
324 return 0.0;
325 return 1.0 - (dot / (norm_a * norm_b));
326 } else {
327 double diff = a - b;
328 return diff * diff;
329 }
330 }

References sophisticated_chunking::DTWChunking< T >::distance_metric_.

Referenced by sophisticated_chunking::DTWChunking< T >::compute_dtw_core().

◆ chunk()

template<typename T >
std::vector< std::vector< T > > sophisticated_chunking::DTWChunking< T >::chunk ( const std::vector< T > &  data) const
inline

Chunk data based on DTW analysis.

Parameters
dataInput data to be chunked
Returns
Vector of chunks

Definition at line 416 of file sophisticated_chunking.hpp.

416 {
417 if (data.empty()) {
418 return {};
419 }
420
421 std::vector<std::vector<T>> result;
422 std::vector<T> current_chunk;
423
424 for (const auto& value : data) {
426 if (!current_chunk.empty()) {
427 double distance = compute_dtw_distance(value, current_chunk.back());
428 if (distance > dtw_threshold_) {
429 result.push_back(current_chunk);
430 current_chunk.clear();
431 }
432 }
433 } else {
434 // Single-dimension logic
435 if (!current_chunk.empty() &&
436 std::abs(static_cast<double>(value - current_chunk.back())) > dtw_threshold_) {
437 result.push_back(current_chunk);
438 current_chunk.clear();
439 }
440 }
441 current_chunk.push_back(value);
442 }
443
444 if (!current_chunk.empty()) {
445 result.push_back(current_chunk);
446 }
447
448 return result;
449 }
double compute_dtw_distance(const std::vector< T > &seq1, const std::vector< T > &seq2) const

References sophisticated_chunking::DTWChunking< T >::compute_dtw_distance(), and sophisticated_chunking::DTWChunking< T >::dtw_threshold_.

Referenced by demonstrate_dtw_chunking(), TEST_F(), and TEST_F().

◆ compute_dtw_core()

template<typename T >
double sophisticated_chunking::DTWChunking< T >::compute_dtw_core ( const std::vector< double > &  seq1,
const std::vector< double > &  seq2 
) const
inlineprivate

Definition at line 332 of file sophisticated_chunking.hpp.

333 {
334 const size_t n = seq1.size();
335 const size_t m = seq2.size();
336 std::vector<std::vector<double>> dp(
337 n + 1, std::vector<double>(m + 1, std::numeric_limits<double>::infinity()));
338
339 dp[0][0] = 0.0;
340
341 for (size_t i = 1; i <= n; ++i) {
342 for (size_t j = std::max(1ul, i - window_size_); j <= std::min(m, i + window_size_);
343 ++j) {
344 double cost = calculate_distance(seq1[i - 1], seq2[j - 1]);
345 dp[i][j] = cost + std::min({
346 dp[i - 1][j], // insertion
347 dp[i][j - 1], // deletion
348 dp[i - 1][j - 1] // match
349 });
350 }
351 }
352
353 return dp[n][m];
354 }
double calculate_distance(double a, double b) const

References sophisticated_chunking::DTWChunking< T >::calculate_distance(), and sophisticated_chunking::DTWChunking< T >::window_size_.

Referenced by sophisticated_chunking::DTWChunking< T >::compute_dtw_distance().

◆ compute_dtw_distance()

template<typename T >
double sophisticated_chunking::DTWChunking< T >::compute_dtw_distance ( const std::vector< T > &  seq1,
const std::vector< T > &  seq2 
) const
inlineprivate

Definition at line 377 of file sophisticated_chunking.hpp.

377 {
380 // For multi-dimensional data, flatten and compare features
381 auto features1 = flatten_features(seq1);
382 auto features2 = flatten_features(seq2);
383 return compute_dtw_core(features1, features2);
384 } else {
385 // For 1D vector data
386 return compute_dtw_core(seq1, seq2);
387 }
388 } else {
389 // For scalar data
390 return std::abs(static_cast<double>(seq1[0] - seq2[0]));
391 }
392 }
std::vector< double > flatten_features(const U &data) const
double compute_dtw_core(const std::vector< double > &seq1, const std::vector< double > &seq2) const

References sophisticated_chunking::DTWChunking< T >::compute_dtw_core(), and sophisticated_chunking::DTWChunking< T >::flatten_features().

Referenced by sophisticated_chunking::DTWChunking< T >::chunk().

◆ computeDTWDistance()

template<typename T >
double sophisticated_chunking::DTWChunking< T >::computeDTWDistance ( const std::vector< T > &  seq1,
const std::vector< T > &  seq2 
) const
private

Compute DTW distance between sequences.

Parameters
seq1First sequence
seq2Second sequence
Returns
DTW distance

◆ flatten_features()

template<typename T >
template<typename U >
std::vector< double > sophisticated_chunking::DTWChunking< T >::flatten_features ( const U &  data) const
inlineprivate

Definition at line 357 of file sophisticated_chunking.hpp.

357 {
360 // Handle 2D arrays
361 std::vector<double> flattened;
362 for (const auto& inner : data) {
363 auto inner_features = flatten_features(inner);
364 flattened.insert(flattened.end(), inner_features.begin(), inner_features.end());
365 }
366 return flattened;
367 } else {
368 // Handle 1D arrays
369 return std::vector<double>(data.begin(), data.end());
370 }
371 } else {
372 // Handle scalar values
373 return {static_cast<double>(data)};
374 }
375 }

References sophisticated_chunking::DTWChunking< T >::flatten_features().

Referenced by sophisticated_chunking::DTWChunking< T >::compute_dtw_distance(), and sophisticated_chunking::DTWChunking< T >::flatten_features().

◆ get_distance_metric()

template<typename T >
std::string sophisticated_chunking::DTWChunking< T >::get_distance_metric ( ) const
inline

Get the distance metric.

Returns
Distance metric

Definition at line 489 of file sophisticated_chunking.hpp.

489 {
490 return distance_metric_;
491 }

References sophisticated_chunking::DTWChunking< T >::distance_metric_.

◆ get_dtw_threshold()

template<typename T >
double sophisticated_chunking::DTWChunking< T >::get_dtw_threshold ( ) const
inline

Get the threshold for chunk boundaries.

Returns
Threshold for chunk boundaries

Definition at line 463 of file sophisticated_chunking.hpp.

463 {
464 return dtw_threshold_;
465 }

References sophisticated_chunking::DTWChunking< T >::dtw_threshold_.

◆ get_window_size()

template<typename T >
size_t sophisticated_chunking::DTWChunking< T >::get_window_size ( ) const
inline

Get the size of the warping window.

Returns
Size of the warping window

Definition at line 455 of file sophisticated_chunking.hpp.

455 {
456 return window_size_;
457 }

References sophisticated_chunking::DTWChunking< T >::window_size_.

◆ set_distance_metric()

template<typename T >
void sophisticated_chunking::DTWChunking< T >::set_distance_metric ( const std::string &  metric)
inline

Set the distance metric.

Parameters
metricDistance metric

Definition at line 497 of file sophisticated_chunking.hpp.

497 {
498 if (metric != "euclidean" && metric != "manhattan" && metric != "cosine") {
499 throw std::invalid_argument(
500 "Invalid distance metric. Supported metrics: euclidean, manhattan, cosine");
501 }
502 distance_metric_ = metric;
503 }

References sophisticated_chunking::DTWChunking< T >::distance_metric_.

◆ set_dtw_threshold()

template<typename T >
void sophisticated_chunking::DTWChunking< T >::set_dtw_threshold ( double  threshold)
inline

Set the threshold for chunk boundaries.

Parameters
thresholdThreshold for chunk boundaries

Definition at line 481 of file sophisticated_chunking.hpp.

481 {
482 dtw_threshold_ = threshold;
483 }

References sophisticated_chunking::DTWChunking< T >::dtw_threshold_.

◆ set_window_size()

template<typename T >
void sophisticated_chunking::DTWChunking< T >::set_window_size ( size_t  size)
inline

Set the size of the warping window.

Parameters
sizeSize of the warping window

Definition at line 471 of file sophisticated_chunking.hpp.

471 {
472 if (size == 0)
473 throw std::invalid_argument("Window size cannot be zero");
474 window_size_ = size;
475 }

References sophisticated_chunking::DTWChunking< T >::window_size_.

Member Data Documentation

◆ distance_metric_

◆ dtw_threshold_

◆ window_size_


The documentation for this class was generated from the following file: