Class implementing neural network-based chunking. More...

#include <neural_chunking.hpp>

Collaboration diagram for neural_chunking::NeuralChunking< T >:

Public Member Functions
	NeuralChunking (size_t window_size=8, double threshold=0.5)

std::vector< std::vector< T > >	chunk (const std::vector< T > &data) const

std::string	get_activation () const
	Get the current activation function type.

size_t	get_batch_size () const
	Get the current batch size.

size_t	get_epochs () const
	Get the current number of training epochs.

double	get_learning_rate () const
	Get the current learning rate.

double	get_threshold () const

size_t	get_window_size () const

void	set_activation (const std::string &activation)
	Set the activation function type.

void	set_batch_size (size_t size)
	Set the batch size for training.

void	set_epochs (size_t num_epochs)
	Set the number of training epochs.

void	set_learning_rate (double rate)
	Set the learning rate for neural network training.

void	set_threshold (double threshold)

void	set_window_size (size_t size)

std::vector< double >	train (const std::vector< T > &data)
	Train the neural network on the provided data.

Private Member Functions
double	activation_derivative (double x) const

double	apply_activation (double x) const

template<typename U >
double	compute_feature (const U &arr) const

std::vector< double >	prepare_batch (const std::vector< T > &data, size_t start_idx) const

Private Attributes
std::string	activation_

size_t	batch_size_

size_t	epochs_

double	learning_rate_

double	threshold_

size_t	window_size_

Detailed Description

template<typename T>
class neural_chunking::NeuralChunking< T >

Class implementing neural network-based chunking.

Template Parameters

T	Data type of elements to chunk

Definition at line 81 of file neural_chunking.hpp.

Constructor & Destructor Documentation

◆ NeuralChunking()

template<typename T >

neural_chunking::NeuralChunking< T >::NeuralChunking	(	size_t	window_size = `8`,
		double	threshold = `0.5`
	)

inline

Definition at line 149 of file neural_chunking.hpp.

150 : window_size_(window_size), threshold_(threshold), learning_rate_(0.01), batch_size_(32),

151 activation_("relu"), epochs_(100) {}

neural_chunking::NeuralChunking::learning_rate_

double learning_rate_

Definition neural_chunking.hpp:85

neural_chunking::NeuralChunking::activation_

std::string activation_

Definition neural_chunking.hpp:87

neural_chunking::NeuralChunking::batch_size_

size_t batch_size_

Definition neural_chunking.hpp:86

neural_chunking::NeuralChunking::window_size_

size_t window_size_

Definition neural_chunking.hpp:83

neural_chunking::NeuralChunking::epochs_

size_t epochs_

Definition neural_chunking.hpp:88

neural_chunking::NeuralChunking::threshold_

double threshold_

Definition neural_chunking.hpp:84

Member Function Documentation

◆ activation_derivative()

template<typename T >

double neural_chunking::NeuralChunking< T >::activation_derivative ( double x ) const

inlineprivate

Definition at line 101 of file neural_chunking.hpp.

                                                 {
        if (activation_ == "relu") {
            return x > 0 ? 1 : 0;
        } else if (activation_ == "sigmoid") {
            double sig = apply_activation(x);
            return sig * (1 - sig);
        } else { // tanh
            double tanh_x = std::tanh(x);
            return 1 - tanh_x * tanh_x;
        }
    }

◆ apply_activation()

template<typename T >

double neural_chunking::NeuralChunking< T >::apply_activation ( double x ) const

inlineprivate

Definition at line 91 of file neural_chunking.hpp.

                                            {
        if (activation_ == "relu") {
            return x > 0 ? x : 0;
        } else if (activation_ == "sigmoid") {
            return 1.0 / (1.0 + std::exp(-x));
        } else { // tanh
            return std::tanh(x);
        }
    }

◆ chunk()

template<typename T >

std::vector< std::vector< T > > neural_chunking::NeuralChunking< T >::chunk ( const std::vector< T > & data ) const

inline

Definition at line 167 of file neural_chunking.hpp.

                                                                  {
        if (data.empty()) {
            return {};
        }
 
        // Handle case where data is smaller than window size
        if (data.size() <= window_size_) {
            return {data};
        }
 
        std::vector<std::vector<T>> result;
        std::vector<T> current_chunk;
 
        for (const auto& value : data) {
            if constexpr (chunk_processing::is_vector<T>::value) {
                double feature = compute_feature(value);
                if (!current_chunk.empty() &&
                    std::abs(feature - compute_feature(current_chunk.back())) > threshold_) {
                    result.push_back(current_chunk);
                    current_chunk.clear();
                }
            } else {
                // Single-dimension logic
                if (!current_chunk.empty() &&
                    std::abs(static_cast<double>(value - current_chunk.back())) > threshold_) {
                    result.push_back(current_chunk);
                    current_chunk.clear();
                }
            }
            current_chunk.push_back(value);
        }
 
        if (!current_chunk.empty()) {
            result.push_back(current_chunk);
        }
 
        return result;
    }

Referenced by main(), and TEST_F().

◆ compute_feature()

template<typename T >

template<typename U >

double neural_chunking::NeuralChunking< T >::compute_feature ( const U & arr ) const

inlineprivate

Definition at line 129 of file neural_chunking.hpp.

                                               {
        if constexpr (chunk_processing::is_vector<U>::value) {
            if constexpr (chunk_processing::is_vector<typename U::value_type>::value) {
                // Handle 2D arrays
                double sum = 0.0;
                for (const auto& inner : arr) {
                    sum += compute_feature(inner);
                }
                return sum / arr.size();
            } else {
                // Handle 1D arrays
                return std::accumulate(arr.begin(), arr.end(), 0.0) / arr.size();
            }
        } else {
            // Handle scalar values
            return static_cast<double>(arr);
        }
    }

◆ get_activation()

template<typename T >

std::string neural_chunking::NeuralChunking< T >::get_activation ( ) const

inline

Get the current activation function type.

Returns: Current activation function name

Definition at line 260 of file neural_chunking.hpp.

                                     {
        return activation_;
    }

◆ get_batch_size()

template<typename T >

size_t neural_chunking::NeuralChunking< T >::get_batch_size ( ) const

inline

Get the current batch size.

Returns: Current batch size

Definition at line 240 of file neural_chunking.hpp.

                                  {
        return batch_size_;
    }

◆ get_epochs()

template<typename T >

size_t neural_chunking::NeuralChunking< T >::get_epochs ( ) const

inline

Get the current number of training epochs.

Returns: Current number of epochs

Definition at line 279 of file neural_chunking.hpp.

                              {
        return epochs_;
    }

◆ get_learning_rate()

template<typename T >

double neural_chunking::NeuralChunking< T >::get_learning_rate ( ) const

inline

Get the current learning rate.

Returns: Current learning rate

Definition at line 221 of file neural_chunking.hpp.

                                     {
        return learning_rate_;
    }

◆ get_threshold()

template<typename T >

double neural_chunking::NeuralChunking< T >::get_threshold ( ) const

inline

Definition at line 163 of file neural_chunking.hpp.

                                 {
        return threshold_;
    }

◆ get_window_size()

template<typename T >

size_t neural_chunking::NeuralChunking< T >::get_window_size ( ) const

inline

Definition at line 160 of file neural_chunking.hpp.

                                   {
        return window_size_;
    }

Referenced by main().

◆ prepare_batch()

template<typename T >

std::vector< double > neural_chunking::NeuralChunking< T >::prepare_batch	(	const std::vector< T > &	data,
		size_t	start_idx
	)		const

inlineprivate

Definition at line 114 of file neural_chunking.hpp.

                                                                                      {
        std::vector<double> batch;
        batch.reserve(std::min(batch_size_, data.size() - start_idx));
 
        for (size_t i = 0; i < batch_size_ && (start_idx + i) < data.size(); ++i) {
            if constexpr (chunk_processing::is_vector<T>::value) {
                batch.push_back(compute_feature(data[start_idx + i]));
            } else {
                batch.push_back(static_cast<double>(data[start_idx + i]));
            }
        }
        return batch;
    }

◆ set_activation()

template<typename T >

void neural_chunking::NeuralChunking< T >::set_activation ( const std::string & activation )

inline

Set the activation function type.

Parameters

activation Activation function name ("relu", "sigmoid", or "tanh")

Definition at line 248 of file neural_chunking.hpp.

                                                     {
        if (activation != "relu" && activation != "sigmoid" && activation != "tanh") {
            throw std::invalid_argument(
                "Invalid activation function. Supported: relu, sigmoid, tanh");
        }
        activation_ = activation;
    }

◆ set_batch_size()

template<typename T >

void neural_chunking::NeuralChunking< T >::set_batch_size ( size_t size )

inline

Set the batch size for training.

Parameters

size	Batch size (must be positive)

Definition at line 229 of file neural_chunking.hpp.

                                     {
        if (size == 0) {
            throw std::invalid_argument("Batch size must be positive");
        }
        batch_size_ = size;
    }

◆ set_epochs()

template<typename T >

void neural_chunking::NeuralChunking< T >::set_epochs ( size_t num_epochs )

inline

Set the number of training epochs.

Parameters

num_epochs Number of epochs (must be positive)

Definition at line 268 of file neural_chunking.hpp.

                                       {
        if (num_epochs == 0) {
            throw std::invalid_argument("Number of epochs must be positive");
        }
        epochs_ = num_epochs;
    }

◆ set_learning_rate()

template<typename T >

void neural_chunking::NeuralChunking< T >::set_learning_rate ( double rate )

inline

Set the learning rate for neural network training.

Parameters

rate	Learning rate value (must be positive)

Definition at line 210 of file neural_chunking.hpp.

                                        {
        if (rate <= 0.0) {
            throw std::invalid_argument("Learning rate must be positive");
        }
        learning_rate_ = rate;
    }

◆ set_threshold()

template<typename T >

void neural_chunking::NeuralChunking< T >::set_threshold ( double threshold )

inline

Definition at line 156 of file neural_chunking.hpp.

                                         {
        threshold_ = threshold;
    }

Referenced by main().

◆ set_window_size()

template<typename T >

void neural_chunking::NeuralChunking< T >::set_window_size ( size_t size )

inline

Definition at line 153 of file neural_chunking.hpp.

                                      {
        window_size_ = size;
    }

◆ train()

template<typename T >

std::vector< double > neural_chunking::NeuralChunking< T >::train ( const std::vector< T > & data )

inline

Train the neural network on the provided data.

Parameters

data	Training data

Returns: Vector of loss values for each epoch

Definition at line 288 of file neural_chunking.hpp.

                                                      {
        if (data.size() < window_size_) {
            throw std::invalid_argument("Training data size must be larger than window size");
        }
 
        // Initialize neural network layers
        Layer<double> input_layer(window_size_, window_size_);
        Layer<double> hidden_layer(window_size_, 1);
 
        std::vector<double> epoch_losses;
        epoch_losses.reserve(epochs_);
 
        // Training loop
        for (size_t epoch = 0; epoch < epochs_; ++epoch) {
            double epoch_loss = 0.0;
            size_t num_batches = (data.size() + batch_size_ - 1) / batch_size_;
 
            for (size_t batch = 0; batch < num_batches; ++batch) {
                size_t start_idx = batch * batch_size_;
                auto batch_data = prepare_batch(data, start_idx);
                if (batch_data.size() < window_size_)
                    break;
 
                // Forward pass
                auto hidden = input_layer.forward(batch_data);
                for (auto& h : hidden)
                    h = apply_activation(h);
                auto output = hidden_layer.forward(hidden);
 
                // Compute loss
                double target = batch_data.back();
                double prediction = output[0];
                double loss = 0.5 * (prediction - target) * (prediction - target);
                epoch_loss += loss;
 
                // Backward pass and update weights (simplified)
                double error = prediction - target;
                double delta = error * activation_derivative(prediction);
 
                // Update weights (simplified backpropagation)
                for (size_t i = 0; i < window_size_; ++i) {
                    hidden[i] -= learning_rate_ * delta * batch_data[i];
                }
            }
 
            epoch_losses.push_back(epoch_loss / num_batches);
        }
 
        return epoch_losses;
    }

private

Definition at line 83 of file neural_chunking.hpp.

The documentation for this class was generated from the following file:

neural_chunking.hpp

Public Member Functions

Private Member Functions

Private Attributes

Detailed Description

Constructor & Destructor Documentation

◆ NeuralChunking()

Member Function Documentation

◆ activation_derivative()

◆ apply_activation()

◆ chunk()

◆ compute_feature()

◆ get_activation()

◆ get_batch_size()

◆ get_epochs()

◆ get_learning_rate()

◆ get_threshold()

◆ get_window_size()

◆ prepare_batch()

◆ set_activation()

◆ set_batch_size()

◆ set_epochs()

◆ set_learning_rate()

◆ set_threshold()

◆ set_window_size()

◆ train()

Member Data Documentation

◆ activation_

◆ batch_size_

◆ epochs_

◆ learning_rate_

◆ threshold_

◆ window_size_