I wrote the following code to train myself to use RcppParallel. It is just a toy example.
// [[Rcpp::depends(RcppParallel)]]
#include <Rcpp.h>
#include <RcppParallel.h>
#include <iostream>
using namespace Rcpp;
using namespace RcppParallel;
struct Lapin : public Worker {
// input pars
const NumericVector input;
const size_t dim;
// outputs a matrix
NumericMatrix output;
// two constructors
Lapin(const NumericVector input, const int dim) : input(input), dim(dim), output(NumericMatrix(dim,dim)) {}
Lapin(const Lapin & jeannot, Split) : input(jeannot.input), dim(jeannot.dim), output(NumericMatrix(dim,dim)) {}
// the working operator
void operator()(size_t begin, size_t end) {
for(size_t k = begin; k < end; k++) {
for(size_t i = 0; i < dim; i++) {
for(size_t j = 0; j < dim; j++) {
output(i,j) += input(k)+i+j;
}
}
}
}
// the join
void join(const Lapin & peter) {
output += peter.output;
}
};
// [[Rcpp::export]]
NumericMatrix f(NumericVector A, size_t dim) {
Lapin groumf(A, dim);
parallelReduce(0, A.length(), groumf);
return groumf.output;
}
Here is what happens in R, after sourceCpp-ing it:
> f(rep(1,1100), 5)
[,1] [,2] [,3] [,4] [,5]
[1,] 1100 2200 3300 4400 5500
[2,] 2200 3300 4400 5500 6600
[3,] 3300 4400 5500 6600 7700
[4,] 4400 5500 6600 7700 8800
[5,] 5500 6600 7700 8800 9900
> sourceCpp("parallel-matrix-reduce.cpp")
> f(rep(1,1100), 5)
Warning: stack imbalance in '.Call', 6 then 11
[,1] [,2] [,3] [,4] [,5]
[1,] 1100 2200 3300 4400 5500
[2,] 2200 3300 4400 5500 6600
[3,] 3300 4400 5500 6600 7700
[4,] 4400 5500 6600 7700 8800
[5,] 5500 6600 7700 8800 9900
Note that the behavior is eratic: sometimes, I have no warning at all, sometimes it is at the first run... I guess my session info can be useful here:
> sessionInfo()
R version 3.1.2 (2014-10-31)
Platform: x86_64-redhat-linux-gnu (64-bit)
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=fr_FR.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=fr_FR.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=fr_FR.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=fr_FR.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] Rcpp_0.11.3
loaded via a namespace (and not attached):
[1] RcppParallel_4.3.3 tools_3.1.2
I thank you all in advance for your answers and comments.
EDIT As Dirk explains below, this is due to the use of R types in the Worker, which confuses the garbage collector. I settled the issue by using Armadillo matrices instead (I was a bit confused by RMatrix). Here is the corrected code:
// [[Rcpp::depends(RcppParallel)]]
// [[Rcpp::depends(RcppArmadillo)]]
#include <RcppArmadillo.h>
#include <RcppParallel.h>
#include <iostream>
using namespace Rcpp;
using namespace RcppParallel;
struct Lapin : public Worker {
// input pars
const arma::vec input;
const size_t dim;
// outputs a matrix
arma::mat output;
// two constructors
Lapin(const arma::vec input, const int dim) : input(input), dim(dim), output(arma::mat(dim,dim)) {
output.zeros();
}
Lapin(const Lapin & jeannot, Split) : input(jeannot.input), dim(jeannot.dim), output(arma::mat(dim,dim)) {
output.zeros();
}
// the working operator
void operator()(size_t begin, size_t end) {
for(size_t k = begin; k < end; k++) {
for(size_t i = 0; i < dim; i++) {
for(size_t j = 0; j < dim; j++) {
output(i,j) += input(k)+i+j;
}
}
}
}
// the join
void join(const Lapin & peter) {
output += peter.output;
}
};
// [[Rcpp::export]]
arma::mat f(arma::vec & A, size_t dim) {
Lapin groumf(A, dim);
parallelReduce(0, A.size(), groumf);
return groumf.output;
}
See Question&Answers more detail:
os