c++ - In CUDA / Thrust, how can I access a vector element's neighbor during a for-each operation? -


i trying scientific simulation using thrust library in cuda, got stuck in following operation for-each loop:

device_vector<float> in(n);  for-each in(x) in in       out(x) = some_calculation(in(x-1),in(x),in(x+1)); end 

i have looked stackoverflow.com , find similar questions: similar questions 1

but seems using transform iterator possible when some_calculation function done between 2 parameters, transform iterator passes 2 parameters @ most.

then, question 2: similar questions 2

the discussion ended without conclusion.

i believe simple problem because it's natural requirements parallel calculation. tell me do?

fancy iterators key sort of operation, isn't intuitive in thrust. can use zip_iterator create tuples of values can iterated over, typical f(x[i-1], x[i], x[i+1]) type function, this:

#include <iostream> #include <cmath> #include <thrust/iterator/zip_iterator.h> #include <thrust/tuple.h> #include <thrust/transform.h>  struct divided_diff {     float dx;     divided_diff(float _dx) : dx(_dx) {};      float operator()(const thrust::tuple<float, float, float> &in) const {         float y0 = in.get<0>();         float y1 = in.get<1>();         float y2 = in.get<2>();          return (y0 - 2.f * y1 + y2) / (dx * dx);     } };  int main() {     const int n = 10;     const float dx = 0.1f;     float x[n], y[n], dydx[n];      (int = 0; < n; ++i) {         x[i] = dx * float(i);         y[i] = std::sin(x[i]);         dydx[i] = 0.f;     }      auto begin = thrust::make_zip_iterator(thrust::make_tuple(&y[0], &y[1], &y[2]));     auto end = thrust::make_zip_iterator(thrust::make_tuple(&y[n-2], &y[n-1], &y[n]));      divided_diff f(dx);     thrust::transform(begin, end, &dydx[1], f);      (int = 0; < n; ++i) {         std::cout << << " " << dydx[i] << std::endl;     }      return 0; } 

here functor processes 1 tuple @ time, tuple contains 3 inputs 3 different starting points in same array or iterative sequence.


edit: apparently converting host version of code use device constructs proving challenging poster, here version executes on device using thrust::device_vector base container:

#include <iostream> #include <cmath> #include <thrust/tuple.h> #include <thrust/transform.h> #include <thrust/iterator/zip_iterator.h> #include <thrust/device_vector.h> #include <thrust/sequence.h>  struct divided_diff {     float dx;     divided_diff(float _dx) : dx(_dx) {};      __device__     float operator()(const thrust::tuple<float, float, float> &in) {         float y0 = in.get<0>();         float y1 = in.get<1>();         float y2 = in.get<2>();          return (y0 - 2.f*y1 + y2) / (dx * dx);     } };  struct mysinf {     __device__     float operator()(const float &x) {          return __sinf(x);      } };  int main() {      const int n = 10;     const float dx = 0.1f;     thrust::device_vector<float> x(n), y(n), dydx(n-2);      thrust::sequence(x.begin(), x.end(), 0.f, dx);      thrust::transform(x.begin(), x.end(), y.begin(), mysinf());      auto start  = thrust::make_zip_iterator(thrust::make_tuple(y.begin(), y.begin()+1, y.begin()+2));     auto finish = thrust::make_zip_iterator(thrust::make_tuple(y.end()-2, y.end()-1, y.end()));      divided_diff f(dx);     thrust::transform( start, finish, dydx.begin(), f);      thrust::device_vector<float>::iterator = dydx.begin();     for(; != dydx.end(); ++it) {         float val = *it;         std::cout << val << std::endl;     }      return 0; } 

Comments

Popular posts from this blog

apache - PHP Soap issue while content length is larger -

asynchronous - Python asyncio task got bad yield -

javascript - Complete OpenIDConnect auth when requesting via Ajax -