35 return std::vector<double>();
38 std::vector<double> coefficients;
44 for (
size_t i = 0; i <= data.size() -
window_size_; ++i) {
47 double diff =
static_cast<double>(data[i + j]) -
55 const std::vector<double> h = {-0.1294, 0.2241, 0.8365,
57 for (
size_t i = 0; i <= data.size() -
window_size_; ++i) {
59 for (
size_t j = 0; j < std::min(
window_size_,
size_t(4)); ++j) {
60 if (i + j < data.size()) {
61 sum += h[j] *
static_cast<double>(data[i + j]);
64 coefficients.push_back(std::abs(sum));
67 throw std::invalid_argument(
"Unsupported wavelet type: " +
wavelet_type_);
87 std::vector<std::vector<T>>
chunk(
const std::vector<T>& data)
const {
93 std::vector<std::vector<T>> chunks;
94 std::vector<T> current_chunk;
97 for (
const T& value : data) {
98 current_chunk.push_back(value);
100 if (i < coefficients.size() && coefficients[i] >
threshold_) {
101 if (!current_chunk.empty()) {
102 chunks.push_back(current_chunk);
103 current_chunk.clear();
109 if (!current_chunk.empty()) {
110 chunks.push_back(current_chunk);
138 throw std::invalid_argument(
"Window size cannot be zero");
163 if (type !=
"haar" && type !=
"db1" && type !=
"sym2") {
164 throw std::invalid_argument(
"Invalid wavelet type. Supported types: haar, db1, sym2");
187 const std::vector<T>& segment2)
const {
188 if (segment1.empty() || segment2.empty()) {
193 std::map<T, double> p1, p2;
194 std::map<std::pair<T, T>,
double> p12;
196 for (
const auto& val : segment1) {
197 p1[val] += 1.0 / segment1.size();
200 for (
const auto& val : segment2) {
201 p2[val] += 1.0 / segment2.size();
205 size_t min_size = std::min(segment1.size(), segment2.size());
206 for (
size_t i = 0; i < min_size; ++i) {
207 p12[{segment1[i], segment2[i]}] += 1.0 / min_size;
212 for (
const auto& [val1, prob1] : p1) {
213 for (
const auto& [val2, prob2] : p2) {
214 auto joint_prob = p12[{val1, val2}];
215 if (joint_prob > 0) {
216 mi += joint_prob * std::log2(joint_prob / (prob1 * prob2));
238 std::vector<std::vector<T>>
chunk(
const std::vector<T>& data)
const {
243 std::vector<std::vector<T>> chunks;
244 std::vector<T> current_chunk;
246 for (
size_t i = 0; i < data.size(); ++i) {
247 current_chunk.push_back(data[i]);
250 std::vector<T> next_segment(data.begin() + i + 1,
257 chunks.push_back(current_chunk);
258 current_chunk.clear();
263 if (!current_chunk.empty()) {
264 chunks.push_back(current_chunk);
292 throw std::invalid_argument(
"Context size cannot be zero");
318 return std::abs(a - b);
321 double norm_a = std::abs(a);
322 double norm_b = std::abs(b);
323 if (norm_a == 0 || norm_b == 0)
325 return 1.0 - (dot / (norm_a * norm_b));
333 const std::vector<double>& seq2)
const {
334 const size_t n = seq1.size();
335 const size_t m = seq2.size();
336 std::vector<std::vector<double>> dp(
337 n + 1, std::vector<double>(m + 1, std::numeric_limits<double>::infinity()));
341 for (
size_t i = 1; i <= n; ++i) {
345 dp[i][j] = cost + std::min({
356 template <
typename U>
361 std::vector<double> flattened;
362 for (
const auto& inner : data) {
364 flattened.insert(flattened.end(), inner_features.begin(), inner_features.end());
369 return std::vector<double>(data.begin(), data.end());
373 return {
static_cast<double>(data)};
390 return std::abs(
static_cast<double>(seq1[0] - seq2[0]));
416 std::vector<std::vector<T>>
chunk(
const std::vector<T>& data)
const {
421 std::vector<std::vector<T>> result;
422 std::vector<T> current_chunk;
424 for (
const auto& value : data) {
426 if (!current_chunk.empty()) {
429 result.push_back(current_chunk);
430 current_chunk.clear();
435 if (!current_chunk.empty() &&
436 std::abs(
static_cast<double>(value - current_chunk.back())) >
dtw_threshold_) {
437 result.push_back(current_chunk);
438 current_chunk.clear();
441 current_chunk.push_back(value);
444 if (!current_chunk.empty()) {
445 result.push_back(current_chunk);
473 throw std::invalid_argument(
"Window size cannot be zero");
498 if (metric !=
"euclidean" && metric !=
"manhattan" && metric !=
"cosine") {
499 throw std::invalid_argument(
500 "Invalid distance metric. Supported metrics: euclidean, manhattan, cosine");
Dynamic time warping based chunking for sequence alignment.
std::vector< std::vector< T > > chunk(const std::vector< T > &data) const
Chunk data based on DTW analysis.
void set_distance_metric(const std::string &metric)
Set the distance metric.
double calculate_distance(double a, double b) const
DTWChunking(size_t window_size=10, double dtw_threshold=1.0)
Constructor for DTW-based chunking.
std::vector< double > flatten_features(const U &data) const
std::string distance_metric_
std::string get_distance_metric() const
Get the distance metric.
size_t get_window_size() const
Get the size of the warping window.
void set_window_size(size_t size)
Set the size of the warping window.
double compute_dtw_core(const std::vector< double > &seq1, const std::vector< double > &seq2) const
double computeDTWDistance(const std::vector< T > &seq1, const std::vector< T > &seq2) const
Compute DTW distance between sequences.
double get_dtw_threshold() const
Get the threshold for chunk boundaries.
void set_dtw_threshold(double threshold)
Set the threshold for chunk boundaries.
double compute_dtw_distance(const std::vector< T > &seq1, const std::vector< T > &seq2) const
Wavelet-based chunking strategy using signal processing principles.
double get_threshold() const
Get the coefficient threshold for chunk boundaries.
void set_wavelet_type(const std::string &type)
Set the wavelet type.
void set_window_size(size_t size)
Set the size of the sliding window.
std::vector< double > computeWaveletCoefficients(const std::vector< T > &data) const
Compute discrete wavelet transform coefficients.
std::string wavelet_type_
std::string get_wavelet_type() const
Get the current wavelet type.
void set_threshold(double threshold)
Set the coefficient threshold for chunk boundaries.
WaveletChunking(size_t window_size=8, double threshold=0.5)
Constructor for wavelet-based chunking.
std::vector< std::vector< T > > chunk(const std::vector< T > &data) const
Chunk data based on wavelet transform analysis.
size_t get_window_size() const
Get the size of the sliding window.