As @CarlWitthoft already mentioned you have to rethink your data structure because of many duplicated data.
Here you find a simple vectorized approach:
## create all possible ranges of months
ranges <- mapply(function(mi, ma) {seq(from=mi, to=ma, by="month")}, mi=extremes$min, ma=extremes$max)
## how many months per ID?
n <- unlist(lapply(ranges, length))
## create new data.frame
output <- data.frame(X_BusinessIDDescription=rep(extremes$X_BusinessIDDescription, n),
min=rep(extremes$min, n),
max=rep(extremes$max, n),
month=as.Date(unlist(ranges), origin="1970-01-01"), stringsAsFactors=FALSE)
Comparison to your approach:
extremes <- data.frame(X_BusinessIDDescription=c("ID105", "ID206", "ID204", "ID785", "ID125", "ID107"),
min=as.Date(c("2007-12-01", "2007-12-01", "2007-12-01", "2008-07-01", "2007-11-01", "2007-11-01")),
max=as.Date(c("2008-06-01", "2009-07-01", "2008-02-01", "2010-08-01", "2008-07-01", "2011-06-01")),
month=as.Date(c("2007-12-01", "2007-12-01", "2007-12-01", "2008-07-01", "2007-11-01", "2007-11-01")),
stringsAsFactors=FALSE)
approachWhile <- function(extremes) {
output <- data.frame(X_BusinessIDDescription=NA, min=as.Date("1970-01-01"), max=as.Date("1970-01-01"), month=as.Date("1970-01-01"), stringsAsFactors=FALSE)
IDcounter <- 1
IDmax <- nrow(extremes)
linecounter <- 1
while (IDcounter <= IDmax){
start <- extremes$min[IDcounter]
end <- extremes$max[IDcounter] # add three months
while(start <= end){
output[linecounter,] <- extremes[IDcounter,]
output$month[linecounter] <- start
linecounter <- linecounter+1
start <- seq(start, by ="month", length=2)[2]
}
IDcounter <- IDcounter + 1
}
return(output)
}
approachMapply <- function(extremes) {
ranges <- mapply(function(mi, ma) {seq(from=mi, to=ma, by="month")}, mi=extremes$min, ma=extremes$max)
n <- unlist(lapply(ranges, length))
output <- data.frame(X_BusinessIDDescription=rep(extremes$X_BusinessIDDescription, n),
min=rep(extremes$min, n),
max=rep(extremes$max, n),
month=as.Date(unlist(ranges), origin="1970-01-01"), stringsAsFactors=FALSE)
return(output)
}
identical(approachWhile(extremes), approachMapply(extremes)) ## TRUE
library("rbenchmark")
benchmark(approachWhile(extremes), approachMapply(extremes), order="relative")
# test replications elapsed relative user.self sys.self
#2 approachMapply(extremes) 100 0.176 1.00 0.172 0.000
#1 approachWhile(extremes) 100 6.102 34.67 6.077 0.008