Commit 3777b9e0 authored by linushof's avatar linushof
Browse files

Simplify steps of data tidying and transformation

parent b928077b
# load packages
pacman::p_load(tidyverse)
# load simulation data
cols <- list(.default = col_double(),
boundary = col_factor(),
gamble = col_factor(),
agent = col_factor(),
rare = col_factor(),
attended = col_factor(),
choice = col_factor())
sim_piecewise <- read_csv("data/simulation/sim_piecewise.csv", col_types = cols)
sim_comprehensive <- read_csv("data/simulation/sim_comprehensive.csv", col_types = cols)
# piecewise strategy
## load simulation data
cols_piecewise <- list(.default = col_double(),
boundary = col_factor(),
gamble = col_factor(),
agent = col_factor(),
rare = col_factor(),
round = col_double(),
attended = col_factor(),
choice = col_factor())
sim_piecewise <- read_csv("./R/data/simulation/sim_piecewise.csv",
col_types = cols_piecewise)
## transform data to obtain trial summaries
## tidy data
choices_piecewise <- sim_piecewise %>%
group_by(s, sigma, boundary, a, gamble, agent) %>% # group by unique sample processes
mutate(n_sample = n(), # number of single samples drawn
a_ev_exp = mean(A, na.rm = TRUE), # experienced expected value
b_ev_exp = mean(B, na.rm = TRUE)) %>%
group_by(s, boundary, a, gamble, agent) %>% # group by trials
mutate(n_sample = n(), # total number of single samples
n_a = n_sample - sum(is.na(A)), # number of single samples drawn from risky option
a_p2_exp = round(sum(if_else(A == a_o2, 1, 0), na.rm = TRUE)/n_a, 2), # experienced probability of higher risky outcome
a_p1_exp = round(1 - a_p2_exp, 2), # experienced probability of lower risky outcome
a_ev_exp = round(mean(A, na.rm = TRUE), 2), # experienced mean A
b_ev_exp = round(mean(B, na.rm = TRUE), 2)) %>% # experienced mean B
ungroup() %>%
filter(!is.na(choice)) %>% # remove single samples / leave choices only
mutate(strategy = "piecewise",
s = 1-(s+.5), # transform parameter s (interpret as switching probability)
diff = wdiff,
n_switch = (round*2)-1) %>% # number of switches between prospects
select(!c(attended, A, B, A_rmean, B_rmean, rdiff, A_win, B_win, wdiff)) %>%
select(strategy, s:gamble, rare, a_p1:ev_ratio, agent, n_sample, n_switch, A_sum, B_sum, diff, a_ev_exp, b_ev_exp, choice)
write_csv(choices_piecewise, "./R/data/choices/choices_piecewise.csv")
filter(!is.na(choice)) %>% # discard single samples
# comprehensive strategy
# NOTE: Size of data sets is very large. Data tidying should be done separately before binding data sets.
## tidy data
cols_comprehensive <- list(.default = col_double(),
boundary = col_factor(),
gamble = col_factor(),
agent = col_factor(),
rare = col_factor(),
attended = col_factor(),
choice = col_factor())
mutate(strategy = "piecewise",
s = 1-(s+.5)) %>% # to interpret parameter s as switching probability
select(strategy, s:gamble, rare, a_p1:ev_ratio, agent, n_sample, n_a, a_p1_exp, a_p2_exp, a_ev_exp, b_ev_exp, choice, A_sum, B_sum, diff)
write_csv(choices_piecewise, "data/choices/choices_piecewise.csv")
## data set 1
sim_comprehensive_1 <- read_csv("./R/data/simulation/sim_comprehensive_init.csv",
col_types = cols_comprehensive)
choices_comprehensive_1 <- sim_comprehensive_1 %>%
group_by(s, sigma, boundary, a, gamble, agent) %>% # group by unique sample processes
mutate(n_sample = n(), # number of single samples drawn
switch = case_when(attended != lag(attended) ~ 1, # indicate whether switch occured
attended == lag(attended) ~ 0),
n_switch = sum(switch, na.rm = TRUE), # number of switches between prospects
a_ev_exp = round(mean(A, na.rm = TRUE), 2), # experienced expected value
b_ev_exp = round(mean(B, na.rm = TRUE), 2)) %>%
filter(!is.na(choice)) %>% # remove single samples / leave choices only
select(!c(attended, A, B, switch)) %>%
ungroup() %>%
mutate(strategy = "comprehensive",
s = 1-(s+.5)) %>% # transform parameter s (interpret as switching probability)
select(strategy, s:gamble, rare, a_p1:ev_ratio, agent, n_sample, n_switch, A_sum, B_sum, diff, a_ev_exp, b_ev_exp, choice)
# comprehensive strategy
## data set 2
sim_comprehensive_2 <- read_csv("./R/data/simulation/sim_comprehensive_ext1.csv",
col_types = cols_comprehensive)
## transform data to obtain trial summaries
choices_comprehensive_2 <- sim_comprehensive_2 %>%
group_by(s, sigma, boundary, a, gamble, agent) %>% # group by unique sample processes
mutate(n_sample = n(), # number of single samples drawn
switch = case_when(attended != lag(attended) ~ 1, # indicate whether switch occured
attended == lag(attended) ~ 0),
n_switch = sum(switch, na.rm = TRUE), # number of switches between prospects
a_ev_exp = round(mean(A, na.rm = TRUE), 2), # experienced expected value
choices_comprehensive <- sim_comprehensive %>%
group_by(s, sigma, boundary, a, gamble, agent) %>%
mutate(n_sample = n(),
n_a = n_sample - sum(is.na(A)),
a_p2_exp = round(sum(if_else(A == a_o2, 1, 0), na.rm = TRUE)/n_a, 2),
a_p1_exp = round(1 - a_p2_exp, 2),
a_ev_exp = round(mean(A, na.rm = TRUE), 2),
b_ev_exp = round(mean(B, na.rm = TRUE), 2)) %>%
filter(!is.na(choice)) %>% # remove single samples / leave choices only
select(!c(attended, A, B, switch)) %>%
ungroup() %>%
mutate(strategy = "comprehensive",
s = 1-(s+.5)) %>% # transform parameter s (interpret as switching probability)
select(strategy, s:gamble, rare, a_p1:ev_ratio, agent, n_sample, n_switch, A_sum, B_sum, diff, a_ev_exp, b_ev_exp, choice)
filter(!is.na(choice)) %>%
## data set 3
sim_comprehensive_3 <- read_csv("./R/data/simulation/sim_comprehensive_ext2.csv",
col_types = cols_comprehensive)
## tidy data
choices_comprehensive_3 <- sim_comprehensive_3 %>%
group_by(s, sigma, boundary, a, gamble, agent) %>% # group by unique sample processes
mutate(n_sample = n(), # number of single samples drawn
switch = case_when(attended != lag(attended) ~ 1, # indicate whether switch occured
attended == lag(attended) ~ 0),
n_switch = sum(switch, na.rm = TRUE), # number of switches between prospects
a_ev_exp = round(mean(A, na.rm = TRUE), 2), # experienced expected value
b_ev_exp = round(mean(B, na.rm = TRUE), 2)) %>%
filter(!is.na(choice)) %>% # remove single samples / leave choices only
select(!c(attended, A, B, switch)) %>%
ungroup() %>%
mutate(strategy = "comprehensive",
s = 1-(s+.5)) %>% # transform parameter s (interpret as switching probability)
select(strategy, s:gamble, rare, a_p1:ev_ratio, agent, n_sample, n_switch, A_sum, B_sum, diff, a_ev_exp, b_ev_exp, choice)
## bind data sets
choices_comprehensive <- bind_rows(choices_comprehensive_1, choices_comprehensive_2, choices_comprehensive_3)
write_csv(choices_comprehensive, "./R/data/choices/choices_comprehensive.csv")
s = 1-(s+.5)) %>%
select(strategy, s:gamble, rare, a_p1:ev_ratio, agent, n_sample, n_a, a_p1_exp, a_p2_exp, a_ev_exp, b_ev_exp, choice, A_sum, B_sum, diff)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment