I have a ton of code using the base::order() command and I am really too lazy to code around that in rcpp. Since Rcpp only supports sort, but not order, I spent 2 minutes creating this function:
// [[Rcpp::export]]
Rcpp::NumericVector order_cpp(Rcpp::NumericVector invec){
int leng = invec.size();
NumericVector y = clone(invec);
for(int i=0; i<leng; ++i){
y[sum(invec<invec[i])] = i+1;
}
return(y);
}
It somehow works. If the vectors are containing unique numbers, I get the same result as order(). If they are not unique, results are different, but not wrong (no unique solution really).
Using it:
c=sample(1:1000,500)
all.equal(order(c),order_cpp(c))
microbenchmark(order(c),order_cpp(c))
Unit: microseconds
expr min lq median uq max neval
order(c) 33.507 36.223 38.035 41.356 78.785 100
order_cpp(c) 2372.889 2427.071 2466.312 2501.932 2746.586 100
Ouch!
I need an efficient algorithm.
Ok, so I dug up a bubblesort implementation and adapted it:
// [[Rcpp::export]]
Rcpp::NumericVector bubble_order_cpp2(Rcpp::NumericVector vec){
double tmp = 0;
int n = vec.size();
Rcpp::NumericVector outvec = clone(vec);
for (int i = 0; i <n; ++i){
outvec[i]=static_cast<double>(i)+1.0;
}
int no_swaps;
int passes;
passes = 0;
while(true) {
no_swaps = 0;
for (int i = 0; i < n - 1 - passes; ++i) {
if(vec[i] > vec[i+1]) {
no_swaps++;
tmp = vec[i];
vec[i] = vec[i+1];
vec[i+1] = tmp;
tmp = outvec[i];
outvec[i] = outvec[i+1];
outvec[i+1] = tmp;
};
};
if(no_swaps == 0) break;
passes++;
};
return(outvec);
}
Well, it's better - but not great:
microbenchmark(order(c),order_cpp(c),bubble_order_cpp2(c),sort(c),c[order(c)])
Unit: microseconds
expr min lq median uq max neval
order(c) 33.809 38.034 40.1475 43.3170 72.144 100
order_cpp(c) 2339.080 2435.675 2478.5385 2526.8350 3535.637 100
bubble_order_cpp2(c) 219.752 231.977 234.5430 241.1840 322.383 100
sort(c) 59.467 64.749 68.2205 75.4645 148.815 100
c[order(c)] 38.336 41.204 44.3735 48.1460 93.878 100
Another finding: It's faster to order than to sort.
Well, then for shorter vectors:
c=sample(1:100)
microbenchmark(order(c),order_cpp(c),bubble_order_cpp2(c),sort(c),c[order(c)])
Unit: microseconds
expr min lq median uq max neval
order(c) 10.566 11.4710 12.8300 14.1880 63.089 100
order_cpp(c) 95.689 100.8200 102.7825 107.3105 198.018 100
bubble_order_cpp2(c) 9.962 11.1700 12.0750 13.2830 64.598 100
sort(c) 39.242 41.5065 42.5620 46.3355 155.758 100
c[order(c)] 11.773 12.6790 13.5840 15.9990 82.710 100
Oh well, I have overlooked an RcppArmadillo function:
// [[Rcpp::export]]
Rcpp::NumericVector ordera(arma::vec x) {
return(Rcpp::as<Rcpp::NumericVector>(Rcpp::wrap(arma::sort_index( x )+1)) );
}
microbenchmark(order(c),order_(c),ordera(c))
Unit: microseconds
expr min lq median uq max neval
order(c) 9.660 11.169 11.773 12.377 46.185 100
order_(c) 4.529 5.133 5.736 6.038 34.413 100
ordera(c) 4.227 4.830 5.434 6.038 60.976 100
See Question&Answers more detail:
os