100 std::vector<std::vector<T>>
chunk(
const std::vector<T>& data) {
105 int* d_data = allocate_device_memory<int>(data.size());
106 int* d_boundaries = allocate_device_memory<int>(data.size());
112 const int BLOCK_SIZE = 256;
113 int num_blocks = (data.size() + BLOCK_SIZE - 1) / BLOCK_SIZE;
116 chunk_kernel<<<num_blocks, BLOCK_SIZE, 0, stream>>>(d_data, d_boundaries, data.size(),
123 std::vector<int> boundaries(data.size());
134 std::vector<std::vector<T>> chunks;
135 std::vector<T> current_chunk;
137 for (
size_t i = 0; i < data.size(); ++i) {
138 current_chunk.push_back(data[i]);
139 if (boundaries[i] || i == data.size() - 1) {
140 if (!current_chunk.empty()) {
141 chunks.push_back(std::move(current_chunk));
142 current_chunk = std::vector<T>();