reshape2
is a package which allows an powerful array of data transformations, through its two-part melt/cast approach. However, like all tools it embeds assumptions which limit the cases it can handle.
What data reshaping problem can reshape2
not handle in its current form?
The ideal answer will include:
- A description of the type of use cases where this data shape is typically found
- Sample data
- Code to accomplish the transformation (ideally using as much of the transformation with
reshape2
as possible)
Example
"Wide" data is common in panel applications.
melt.wide <- function(data, id.vars, new.names, sep=".", variable.name="variable", ... ) {
# Guess number of variables currently wide
colnames(data) <- sub( paste0(sep,"$"), "", colnames(data) )
wide.vars <- colnames(data)[grep( sep, colnames(data) )]
n.wide <- str_count( wide.vars, sep )
stopifnot(length(new.names)==unique(n.wide))
# Melt
data.melt <- melt(data,id.vars=id.vars,measure.vars=wide.vars,...)
new <- stack.list(str_split(data.melt$variable,sep))
colnames(new) <- c(variable.name,new.names)
data.melt <- subset(data.melt,select=c(-variable))
cbind(data.melt,new)
}
choice.vars <- colnames(res)[grep("_",colnames(res))]
melt.wide( subset(res,select=c("WorkerId",choice.vars)), id.vars="WorkerId", new.names=c("set","option"), sep="_")
The new function returns a melted object that can then be *cast
.
Where the data is:
so <- structure(list(WorkerId = c(12L, 13L, 27L, 25L, 30L, 8L), pio_1_1 = structure(c(2L,
1L, 2L, 1L, 2L, 2L), .Label = c("No", "Yes"), class = "factor"),
pio_1_2 = structure(c(1L, 2L, 2L, 2L, 1L, 1L), .Label = c("No",
"Yes"), class = "factor"), pio_1_3 = structure(c(1L, 1L,
1L, 1L, 2L, 1L), .Label = c("No", "Yes"), class = "factor"),
pio_1_4 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "No", class = "factor"),
pio_2_1 = structure(c(1L, 2L, 2L, 1L, 1L, 2L), .Label = c("No",
"Yes"), class = "factor"), pio_2_2 = structure(c(1L, 1L,
1L, 2L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"),
pio_2_3 = structure(c(2L, 2L, 2L, 2L, 2L, 1L), .Label = c("No",
"Yes"), class = "factor"), pio_2_4 = structure(c(1L, 1L,
1L, 1L, 1L, 1L), .Label = "No", class = "factor"), pio_3_1 = structure(c(2L,
2L, 2L, 2L, 2L, 1L), .Label = c("No", "Yes"), class = "factor"),
pio_3_2 = structure(c(2L, 1L, 1L, 1L, 2L, 1L), .Label = c("No",
"Yes"), class = "factor"), pio_3_3 = structure(c(2L, 1L,
2L, 1L, 1L, 2L), .Label = c("No", "Yes"), class = "factor"),
pio_3_4 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "No", class = "factor"),
pio_4_1 = structure(c(2L, 1L, 2L, 2L, 1L, 2L), .Label = c("No",
"Yes"), class = "factor"), pio_4_2 = structure(c(2L, 2L,
2L, 1L, 2L, 1L), .Label = c("No", "Yes"), class = "factor"),
pio_4_3 = structure(c(1L, 2L, 1L, 1L, 2L, 2L), .Label = c("No",
"Yes"), class = "factor"), pio_4_4 = structure(c(1L, 1L,
1L, 1L, 1L, 1L), .Label = "No", class = "factor"), caremgmt_1_1 = structure(c(2L,
2L, 1L, 2L, 1L, 2L), .Label = c("No", "Yes"), class = "factor"),
caremgmt_1_2 = structure(c(1L, 2L, 2L, 2L, 1L, 1L), .Label = c("No",
"Yes"), class = "factor"), caremgmt_1_3 = structure(c(1L,
1L, 1L, 1L, 2L, 1L), .Label = c("No", "Yes"), class = "factor"),
caremgmt_1_4 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "No", class = "factor"),
caremgmt_2_1 = structure(c(2L, 2L, 2L, 2L, 2L, 2L), .Label = c("No",
"Yes"), class = "factor"), caremgmt_2_2 = structure(c(1L,
2L, 1L, 2L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"),
caremgmt_2_3 = structure(c(2L, 1L, 2L, 1L, 1L, 1L), .Label = c("No",
"Yes"), class = "factor"), caremgmt_2_4 = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "No", class = "factor"), caremgmt_3_1 = structure(c(2L,
1L, 2L, 1L, 2L, 2L), .Label = c("No", "Yes"), class = "factor"),
caremgmt_3_2 = structure(c(2L, 1L, 2L, 2L, 2L, 1L), .Label = c("No",
"Yes"), class = "factor"), caremgmt_3_3 = structure(c(2L,
2L, 2L, 2L, 2L, 2L), .Label = c("No", "Yes"), class = "factor"),
caremgmt_3_4 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "No", class = "factor"),
caremgmt_4_1 = structure(c(1L, 1L, 2L, 1L, 2L, 1L), .Label = c("No",
"Yes"), class = "factor"), caremgmt_4_2 = structure(c(2L,
2L, 2L, 2L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"),
caremgmt_4_3 = structure(c(1L, 1L, 1L, 1L, 1L, 2L), .Label = c("No",
"Yes"), class = "factor"), caremgmt_4_4 = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "No", class = "factor"), prev_1_1 = structure(c(1L,
1L, 2L, 1L, 2L, 2L), .Label = c("No", "Yes"), class = "factor"),
prev_1_2 = structure(c(1L, 2L, 1L, 2L, 1L, 1L), .Label = c("No",
"Yes"), class = "factor"), prev_1_3 = structure(c(2L, 1L,
1L, 2L, 2L, 2L), .Label = c("No", "Yes"), class = "factor"),
prev_1_4 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "No", class = "factor"),
prev_2_1 = structure(c(1L, 1L, 2L, 1L, 2L, 2L), .Label = c("No",
"Yes"), class = "factor"), prev_2_2 = structure(c(2L, 2L,
1L, 2L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"),
prev_2_3 = structure(c(1L, 2L, 1L, 1L, 2L, 2L), .Label = c("No",
"Yes"), class = "factor"), prev_2_4 = structure(c(1L, 1L,
1L, 1L, 1L, 1L), .Label = "No", class = "factor"), prev_3_1 = structure(c(1L,
2L, 1L, 1L, 2L, 1L), .Label = c("No", "Yes"), class = "factor"),
prev_3_2 = structure(c(1L, 1L, 2L, 1L, 2L, 2L), .Label = c("No",
"Yes"), class = "factor"), prev_3_3 = structure(c(2L, 2L,
1L, 2L, 2L, 2L), .Label = c("No", "Yes"), class = "factor"),
prev_3_4 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "No", class = "factor"),
prev_4_1 = structure(c(1L, 2L, 2L, 1L, 2L, 2L), .Label = c("No",
"Yes"), class = "factor"), prev_4_2 = structure(c(1L, 1L,
2L, 1L, 2L, 2L), .Label = c("No", "Yes"), class = "factor"),
prev_4_3 = structure(c(1L, 1L, 1L, 2L, 2L, 1L), .Label = c("No",
"Yes"), class = "factor"), prev_4_4 = structure(c(1L, 1L,
1L, 1L, 1L, 1L), .Label = "No", class = "factor"), price_1_1 = structure(c(30L,
12L, 1L, 16L, 28L, 17L), .Label = c("$2,500", "$2,504", "$2,507",
"$2,509", "$2,512", "$2,513", "$2,515", "$2,526", "$2,547",
"$2,548", "$2,578", "$2,588", "$2,594", "$2,605", "$2,607",
"$2,617", "$2,618", "$2,622", "$2,635", "$2,649", "$2,670",
"$2,672", "$2,679", "$2,681", "$2,698", "$2,704", "$2,721",
"$2,782", "$2,851", "$2,884", "$2,919", "$2,925", "$2,935",
"$3,022"), class = "factor"), price_1_2 = structure(c(1L,
19L, 5L, 17L, 7L, 1L), .Label = c("$2,500", "$2,501", "$2,502",
"$2,504", "$2,513", "$2,515", "$2,517", "$2,532", "$2,535",
"$2,558", "$2,564", "$2,571", "$2,575", "$2,578", "$2,608",
"$2,633", "$2,634", "$2,675", "$2,678", "$2,687", "$2,730",
"$2,806", "$2,827", "$2,848", "$2,891", "$2,901", "$2,923",
"$2,933", "$2,937", "$2,958", "$2,987"), class = "factor"),
price_1_3 = structure(c(11L, 1L, 1L, 8L, 19L, 14L), .Label = c("$2,500",
"$2,504", "$2,507", "$2,513", "$2,516", "$2,518", "$2,564",
"$2,579", "$2,580", "$2,583", "$2,584", "$2,592", "$2,604",
"$2,608", "$2,639", "$2,643", "$2,646", "$2,665", "$2,667",
"$2,695", "$2,698", "$2,709", "$2,710", "$2,713", "$2,714",
"$2,750", "$2,757", "$2,876", "$2,978", "$2,984", "$3,024",
"$3,059"), class = "factor"), price_1_4 = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "$2,500", class = "factor"),
price_2_1 = structure(c(27L, 32L, 19L, 22L, 4L, 26L), .Label = c("$2,500",
"$2,504", "$2,505", "$2,510", "$2,511", "$2,512", "$2,515",
"$2,517", "$2,518", "$2,529", "$2,533", "$2,537", "$2,551",
"$2,553", "$2,574", "$2,593", "$2,600", "$2,605", "$2,608",
"$2,612", "$2,613", "$2,618", "$2,639", "$2,657", "$2,714",
"$2,730", "$2,747", "$2,764", "$2,771", "$2,773", "$2,813",
"$2,859", "$2,901", "$3,019", "$3,037"), class = "factor"),
price_2_2 = structure(c(12L, 2L, 1L, 27L, 1L, 7L), .Label = c("$2,500",
"$2,502", "$2,510", "$2,514", "$2,515", "$2,516", "$2,517",
"$2,518", "$2,520", "$2,521", "$2,523", "$2,536", "$2,544",
"$2,575", "$2,583", "$2,592", "$2,602", "$2,624", "$2,644",
"$2,652", "$2,662", "$2,677", "$2,720", "$2,761", "$2,765",
"$2,770", "$2,772", "$2,835", "$2,873", "$2,911", "$2,950",
"$2,962"), class = "factor"), price_2_3 = structure(c(32L,
1L, 8L, 33L, 29L, 11L), .Label = c("$2,500", "$2,506", "$2,507",
"$2,510", "$2,511", "$2,512", "$2,515", "$2,517", "$2,527",
"$2,528", "$2,540", "$2,554", "$2,562", "$2,565", "$2,568",
"$2,581", "$2,597", "$2,611", "$2,616", "$2,631", "$2,652",
"$2,663", "$2,671", "$2,672", "$2,685", "$2,727", "$2,731",
"$2,742", "$2,771", "$2,778", "$2,781", "$2,970", "$2,984",
"$2,986", "$3,030"), class = "factor"), price_2_4 = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "$2,500", class = "factor"),
price_3_1 = structure(c(24L, 1L, 28L, 7L, 18L, 21L), .Label = c("$2,500",
"$2,501", "$2,503", "$2,505", "$2,509", "$2,512", "$2,535",
"$2,537", "$2,542", "$2,553", "$2,556", "$2,560", "$2,561",
"$2,574", "$2,584", "$2,618", "$2,624", "$2,629", "$2,637",
"$2,664", "$2,761", "$2,840", "$2,875", "$2,883", "$2,891",
"$2,933", "$2,953", "$2,978", "$3,039", "$3,043", "$3,067"
), class = "factor"), price_3_2 = structure(c(3L, 1L, 5L,
19L, 25L, 9L), .Label = c("$2,500", "$2,501", "$2,503", "$2,504",
"$2,512", "$2,517", "$2,540", "$2,543", "$2,546", "$2,560",
"$2,567", "$2,573", "$2,586", "$2,592", "$2,594", "$2,603",
"$2,604", "$2,606", "$2,628", "$2,633", "$2,635", "$2,693",
"$2,696", "$2,714", "$2,734", "$2,739", "$2,770", "$2,791",
"$2,797", "$2,936", "$2,967", "$3,021", "$3,024"), class = "factor"),
price_3_3 = structure(c(26L, 7L, 5L, 32L, 10L, 24L), .Label = c("$2,500",
"$2,501", "$2,502", "$2,505", "$2,506", "$2,507", "$2,508",
"$2,509", "$2,512", "$2,515", "$2,519", "$2,547", "$2,556",
"$2,574", "$2,587", "$2,592", "$2,608", "$2,616", "$2,621",
"$2,635", "$2,638", "$2,667", "$2,671", "$2,688", "$2,694",
"$2,700", "$2,717", "$2,759", "$2,809", "$2,864", "$2,891",
"$2,912", "$3,011", "$3,012"), class = "factor"), price_3_4 = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "$2,500", class = "factor"),
price_4_1 = structure(c(29L, 13L, 16L, 24L, 33L, 19L), .Label = c("$2,500",
"$2,505", "$2,506", "$2,508", "$2,511", "$2,525", "$2,549",
"$2,562", "$2,577", "$2,582", "$2,586", "$2,591", "$2,621",
"$2,636", "$2,654", "$2,670", "$2,722", "$2,726", "$2,733",
"$2,744", "$2,745", "$2,755", "$2,768", "$2,805", "$2,817",
"$2,827", "$2,835", "$2,888", "$2,925", "$2,959", "$3,001",
"$3,027", "$3,061", "$3,071"), class = "factor"), price_4_2 = structure(c(33L,
31L, 21L, 16L, 25L, 13L), .Label = c("$2,500", "$2,502",
"$2,503", "$2,505", "$2,506", "$2,511", "$2,513", "$2,516",
"$2,529", "$2,539", "$2,547", "$2,554", "$2,557", "$2,562",
"$2,567", "$2,579", "$2,581", "$2,583", "$2,585", "$2,591",
"$2,612", "$2,629", "$2,640", "$2,670", "$2,695", "$2,726",
"$2,737", "$2,788", "$2,790", "$2,798", "$2,852", "$3,031",
"$3,063"), class = "factor"), price_4_3 = structure(c(4L,
30L, 4L, 19L, 1L, 27L), .Label = c("$2,500", "$2,504", "$2,507",
"$2,509", "$2,511", "$2,512", "$2,514", "$2,516", "$2,543",
"$2,552", "$2,562", "$2,575",