c++ - In CUDA / Thrust, how can I access a vector element's neighbor during a for-each operation? -
i trying scientific simulation using thrust library in cuda, got stuck in following operation for-each loop:
device_vector<float> in(n); for-each in(x) in in out(x) = some_calculation(in(x-1),in(x),in(x+1)); end
i have looked stackoverflow.com , find similar questions: similar questions 1
but seems using transform iterator possible when some_calculation function done between 2 parameters, transform iterator passes 2 parameters @ most.
then, question 2: similar questions 2
the discussion ended without conclusion.
i believe simple problem because it's natural requirements parallel calculation. tell me do?
fancy iterators key sort of operation, isn't intuitive in thrust. can use zip_iterator
create tuples of values can iterated over, typical f(x[i-1], x[i], x[i+1])
type function, this:
#include <iostream> #include <cmath> #include <thrust/iterator/zip_iterator.h> #include <thrust/tuple.h> #include <thrust/transform.h> struct divided_diff { float dx; divided_diff(float _dx) : dx(_dx) {}; float operator()(const thrust::tuple<float, float, float> &in) const { float y0 = in.get<0>(); float y1 = in.get<1>(); float y2 = in.get<2>(); return (y0 - 2.f * y1 + y2) / (dx * dx); } }; int main() { const int n = 10; const float dx = 0.1f; float x[n], y[n], dydx[n]; (int = 0; < n; ++i) { x[i] = dx * float(i); y[i] = std::sin(x[i]); dydx[i] = 0.f; } auto begin = thrust::make_zip_iterator(thrust::make_tuple(&y[0], &y[1], &y[2])); auto end = thrust::make_zip_iterator(thrust::make_tuple(&y[n-2], &y[n-1], &y[n])); divided_diff f(dx); thrust::transform(begin, end, &dydx[1], f); (int = 0; < n; ++i) { std::cout << << " " << dydx[i] << std::endl; } return 0; }
here functor processes 1 tuple @ time, tuple contains 3 inputs 3 different starting points in same array or iterative sequence.
edit: apparently converting host version of code use device constructs proving challenging poster, here version executes on device using thrust::device_vector
base container:
#include <iostream> #include <cmath> #include <thrust/tuple.h> #include <thrust/transform.h> #include <thrust/iterator/zip_iterator.h> #include <thrust/device_vector.h> #include <thrust/sequence.h> struct divided_diff { float dx; divided_diff(float _dx) : dx(_dx) {}; __device__ float operator()(const thrust::tuple<float, float, float> &in) { float y0 = in.get<0>(); float y1 = in.get<1>(); float y2 = in.get<2>(); return (y0 - 2.f*y1 + y2) / (dx * dx); } }; struct mysinf { __device__ float operator()(const float &x) { return __sinf(x); } }; int main() { const int n = 10; const float dx = 0.1f; thrust::device_vector<float> x(n), y(n), dydx(n-2); thrust::sequence(x.begin(), x.end(), 0.f, dx); thrust::transform(x.begin(), x.end(), y.begin(), mysinf()); auto start = thrust::make_zip_iterator(thrust::make_tuple(y.begin(), y.begin()+1, y.begin()+2)); auto finish = thrust::make_zip_iterator(thrust::make_tuple(y.end()-2, y.end()-1, y.end())); divided_diff f(dx); thrust::transform( start, finish, dydx.begin(), f); thrust::device_vector<float>::iterator = dydx.begin(); for(; != dydx.end(); ++it) { float val = *it; std::cout << val << std::endl; } return 0; }
Comments
Post a Comment