#include <gpu_chunking.hpp>

Public Member Functions
	GPUChunking (int window_sz=32, float thresh=0.1f)

	~GPUChunking ()

std::vector< std::vector< T > >	chunk (const std::vector< T > &data)

float	get_threshold () const

int	get_window_size () const

void	set_threshold (float thresh)

void	set_window_size (int size)

Static Public Member Functions
static std::string	get_gpu_info ()

static bool	is_gpu_available ()

Private Member Functions
template<typename U >
U *	allocate_device_memory (size_t size)

template<typename U >
void	copy_from_device (U h_ptr, const U d_ptr, size_t size)

template<typename U >
void	copy_to_device (U d_ptr, const U h_ptr, size_t size)

Private Attributes
cudaStream_t	stream

float	threshold

int	window_size

Detailed Description

template<typename T>
class gpu_chunking::GPUChunking< T >

Definition at line 64 of file gpu_chunking.hpp.

Constructor & Destructor Documentation

◆ GPUChunking()

template<typename T >

gpu_chunking::GPUChunking< T >::GPUChunking	(	int	window_sz = `32`,
		float	thresh = `0.1f`
	)

inline

Definition at line 91 of file gpu_chunking.hpp.

        : window_size(window_sz), threshold(thresh) {
        CUDA_CHECK(cudaStreamCreate(&stream));
    }

References CUDA_CHECK, and gpu_chunking::GPUChunking< T >::stream.

◆ ~GPUChunking()

template<typename T >

gpu_chunking::GPUChunking< T >::~GPUChunking ( )

inline

Definition at line 96 of file gpu_chunking.hpp.

                   {
        cudaStreamDestroy(stream);
    }

References gpu_chunking::GPUChunking< T >::stream.

Member Function Documentation

◆ allocate_device_memory()

template<typename T >

template<typename U >

U * gpu_chunking::GPUChunking< T >::allocate_device_memory ( size_t size )

inlineprivate

Definition at line 72 of file gpu_chunking.hpp.

                                           {
        U* d_ptr;
        CUDA_CHECK(cudaMalloc(&d_ptr, size * sizeof(U)));
        return d_ptr;
    }

References CUDA_CHECK.

◆ chunk()

template<typename T >

std::vector< std::vector< T > > gpu_chunking::GPUChunking< T >::chunk ( const std::vector< T > & data )

inline

Definition at line 100 of file gpu_chunking.hpp.

                                                            {
        if (data.empty())
            return {};
 
        // Allocate device memory
        int* d_data = allocate_device_memory<int>(data.size());
        int* d_boundaries = allocate_device_memory<int>(data.size());
 
        // Copy input data to GPU
        copy_to_device(d_data, data.data(), data.size());
 
        // Configure kernel launch parameters
        const int BLOCK_SIZE = 256;
        int num_blocks = (data.size() + BLOCK_SIZE - 1) / BLOCK_SIZE;
 
        // Launch kernel
        chunk_kernel<<<num_blocks, BLOCK_SIZE, 0, stream>>>(d_data, d_boundaries, data.size(),
                                                            window_size, threshold);
 
        // Check for kernel errors
        CUDA_CHECK(cudaGetLastError());
 
        // Copy boundaries back to host
        std::vector<int> boundaries(data.size());
        copy_from_device(boundaries.data(), d_boundaries, data.size());
 
        // Synchronize stream
        CUDA_CHECK(cudaStreamSynchronize(stream));
 
        // Free device memory
        CUDA_CHECK(cudaFree(d_data));
        CUDA_CHECK(cudaFree(d_boundaries));
 
        // Create chunks based on boundaries
        std::vector<std::vector<T>> chunks;
        std::vector<T> current_chunk;
 
        for (size_t i = 0; i < data.size(); ++i) {
            current_chunk.push_back(data[i]);
            if (boundaries[i] || i == data.size() - 1) {
                if (!current_chunk.empty()) {
                    chunks.push_back(std::move(current_chunk));
                    current_chunk = std::vector<T>();
                }
            }
        }
 
        return chunks;
    }

References gpu_chunking::GPUChunking< T >::copy_from_device(), gpu_chunking::GPUChunking< T >::copy_to_device(), CUDA_CHECK, gpu_chunking::GPUChunking< T >::stream, gpu_chunking::GPUChunking< T >::threshold, and gpu_chunking::GPUChunking< T >::window_size.

◆ copy_from_device()

template<typename T >

template<typename U >

void gpu_chunking::GPUChunking< T >::copy_from_device	(	U *	h_ptr,
		const U *	d_ptr,
		size_t	size
	)

inlineprivate

Definition at line 86 of file gpu_chunking.hpp.

                                                                 {
        CUDA_CHECK(cudaMemcpyAsync(h_ptr, d_ptr, size * sizeof(U), cudaMemcpyDeviceToHost, stream));
    }

References CUDA_CHECK, and gpu_chunking::GPUChunking< T >::stream.

Referenced by gpu_chunking::GPUChunking< T >::chunk().

◆ copy_to_device()

template<typename T >

template<typename U >

void gpu_chunking::GPUChunking< T >::copy_to_device	(	U *	d_ptr,
		const U *	h_ptr,
		size_t	size
	)

inlineprivate

Definition at line 80 of file gpu_chunking.hpp.

                                                               {
        CUDA_CHECK(cudaMemcpyAsync(d_ptr, h_ptr, size * sizeof(U), cudaMemcpyHostToDevice, stream));
    }

References CUDA_CHECK, and gpu_chunking::GPUChunking< T >::stream.

Referenced by gpu_chunking::GPUChunking< T >::chunk().

◆ get_gpu_info()

template<typename T >

static std::string gpu_chunking::GPUChunking< T >::get_gpu_info ( )

inlinestatic

Definition at line 180 of file gpu_chunking.hpp.

                                    {
        if (!is_gpu_available()) {
            return "No CUDA-capable GPU found";
        }
 
        cudaDeviceProp prop;
        CUDA_CHECK(cudaGetDeviceProperties(&prop, 0));
 
        return std::string("GPU Device: ") + prop.name +
               "\nCompute capability: " + std::to_string(prop.major) + "." +
               std::to_string(prop.minor);
    }

References CUDA_CHECK, and gpu_chunking::GPUChunking< T >::is_gpu_available().

◆ get_threshold()

template<typename T >

float gpu_chunking::GPUChunking< T >::get_threshold ( ) const

inline

Definition at line 168 of file gpu_chunking.hpp.

                                {
        return threshold;
    }

References gpu_chunking::GPUChunking< T >::threshold.

◆ get_window_size()

template<typename T >

int gpu_chunking::GPUChunking< T >::get_window_size ( ) const

inline

Definition at line 165 of file gpu_chunking.hpp.

                                {
        return window_size;
    }

References gpu_chunking::GPUChunking< T >::window_size.

◆ is_gpu_available()

template<typename T >

static bool gpu_chunking::GPUChunking< T >::is_gpu_available ( )

inlinestatic

Definition at line 173 of file gpu_chunking.hpp.

                                   {
        int device_count;
        cudaError_t error = cudaGetDeviceCount(&device_count);
        return (error == cudaSuccess) && (device_count > 0);
    }

Referenced by gpu_chunking::GPUChunking< T >::get_gpu_info().

◆ set_threshold()

template<typename T >

void gpu_chunking::GPUChunking< T >::set_threshold ( float thresh )

inline

Definition at line 158 of file gpu_chunking.hpp.

                                     {
        if (thresh <= 0.0f || thresh >= 1.0f) {
            throw std::invalid_argument("Threshold must be between 0 and 1");
        }
        threshold = thresh;
    }

References gpu_chunking::GPUChunking< T >::threshold.

◆ set_window_size()

template<typename T >

void gpu_chunking::GPUChunking< T >::set_window_size ( int size )

inline

Definition at line 151 of file gpu_chunking.hpp.

                                   {
        if (size <= 0) {
            throw std::invalid_argument("Window size must be positive");
        }
        window_size = size;
    }