Here's one pipe that samples the new players and their scores into a separate frame, which you can then bind_rows
back into the original data.
set.seed(2021)
newplayers <- dat %>%
filter(!is.na(q1_a)) %>%
group_by(Season, Episode, Round) %>%
summarize(across(everything(), ~ sample(., size=1)), .groups = "drop") %>%
mutate(Player = NA_integer_, Player_type = NA_integer_)
newplayers
# # A tibble: 8 x 11
# Season Episode Round Player Player_type Crowd_size q1_a q2_a q3_a q4_a q5_a
# <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <lgl>
# 1 2020 1 1 NA NA 3 0 0 1 1 NA
# 2 2020 1 2 NA NA 3 1 1 0 0 NA
# 3 2020 1 3 NA NA 3 0 1 1 0 NA
# 4 2020 1 4 NA NA 3 0 0 1 1 NA
# 5 2020 1 5 NA NA 2 1 1 1 0 NA
# 6 2020 1 6 NA NA 2 0 0 0 0 NA
# 7 2020 1 7 NA NA 2 0 0 1 1 NA
# 8 2020 2 1 NA NA 3 0 1 0 0 NA
bind_rows(dat, newplayers) %>%
arrange(Season, Episode, Round, is.na(Player), Player) %>%
head(.)
# Season Episode Round Player Player_type Crowd_size q1_a q2_a q3_a q4_a q5_a
# 1 2020 1 1 1 1 3 0 1 0 0 NA
# 2 2020 1 1 2 1 3 0 1 1 1 NA
# 3 2020 1 1 3 1 3 0 0 0 1 NA
# 4 2020 1 1 NA NA 3 0 0 1 1 NA
# 5 2020 1 2 1 1 3 1 1 0 1 NA
# 6 2020 1 2 2 1 3 1 0 1 0 NA
I didn't know what values to put into Player*
, so I chose NA
.
Data
# dput(dat)
dat <- structure(list(Season = c(2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L, 2020L), Episode = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L), Round = c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L, 1L, 1L, 1L), Player = c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), Player_type = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), Crowd_size = c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L), q1_a = c(0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, NA, 0L, 0L, NA, 0L, 1L, NA, 1L, 0L, 0L), q2_a = c(1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, NA, 0L, 0L, NA, 1L, 0L, NA, 1L, 0L, 1L), q3_a = c(0L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, NA, 0L, 0L, NA, 1L, 0L, NA, 0L, 0L, 1L), q4_a = c(0L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, NA, 0L, 0L, NA, 1L, 1L, NA, 0L, 1L, 0L), q5_a = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), class = "data.frame", row.names = c(NA, -24L))