First, assuming the data are sorted by Shopper
and then by Day
in ascending order, you can add a column indicating the purchase number with
df$Purchase <- unlist(with(df, tapply(Shopper, Shopper, seq_along)))
df
# Day Shopper Choice Purchase
#1 1 A apple 1
#2 2 A apple 2
#3 1 B Banana 1
#4 1 C apple 1
#5 2 C Banana 2
#6 3 C apple 3
#7 1 D berry 1
#8 2 D berry 2
Then reshape the data-frame to "wide" format with
df.w <- reshape(df[c('Shopper', 'Choice', 'Purchase')],
idvar='Shopper', v.names='Choice', timevar='Purchase',
direction='wide')
df.w
# Shopper Choice.1 Choice.2 Choice.3
#1 A apple apple <NA>
#3 B Banana <NA> <NA>
#4 C apple Banana apple
#7 D berry berry <NA>
Finally you calculate the repurchase matrix of the first two purchases
with(df.w, prop.table(table(First=Choice.1, Second=Choice.2)))
# Second
#First apple Banana berry
# apple 0.3333333 0.3333333 0.0000000
# Banana 0.0000000 0.0000000 0.0000000
# berry 0.0000000 0.0000000 0.3333333
To calculate the repurchase matrix of all purchases, start with the repurchase matrices of every two consecutive purchases
repurchase <- lapply(seq(2, ncol(df.w) - 1),
function(i) table(First=df.w[[i]], Second=df.w[[i + 1]]))
repurchase <- simplify2array(repurchase)
repurchase
#, , 1
#
# Second
#First apple Banana berry
# apple 1 1 0
# Banana 0 0 0
# berry 0 0 1
#
#, , 2
#
# Second
#First apple Banana berry
# apple 0 0 0
# Banana 1 0 0
# berry 0 0 0
then add all matrices to get the "total" repurchase matrix
apply(repurchase, 1:2, sum)
# Second
#First apple Banana berry
# apple 1 1 0
# Banana 1 0 0
# berry 0 0 1
(absolute frequencies)
prop.table(apply(repurchase, 1:2, sum))
# Second
#First apple Banana berry
# apple 0.25 0.25 0.00
# Banana 0.25 0.00 0.00
# berry 0.00 0.00 0.25
(relative frequencies)