fsmi-data.RmdJust the code… (unevaluated)
This code is no longer consistent with Qualtrics’ column specifications and needs to be checked.
library(scorequaltrics)
library(dplyr)
library(tidyr)
loaded <- scorequaltrics::creds_from_file(qualtrics_api_token_file)
survey_names_all <- scorequaltrics::get_surveys()
survey_names_splt <- dplyr::filter(survey_names_all, grepl('.*(TDS1|TDS2|TDS3|YADS).*', SurveyName))Anonymize it, and make it part of the package while we’re at it.
SES_questions <- paste0('SES_',
c(5, 9, 11, '12_1_TEXT', '13_1_TEXT', 15, 16), '$')
YRBS_questions <- paste0('YRBS_',
c(1:4, 9:19, 31:37),'$')
item_name_regex <- paste0('(fsmi|K_SRQ_|SAQ_|dominance_prestige|UPPSP_|',
paste(SES_questions, collapse = '|'), '|',
paste(YRBS_questions, collapse = '|'),')')
#version 1
yadsv_v1_surveys <- survey_names_splt %>%
filter(grepl('YADS-V$', SurveyName))
yadsv_v1_qid_sid <- scorequaltrics::get_survey_data(yadsv_v1_surveys,
credentials,
pid_col = 'SID') %>%
dplyr::filter(grepl('ResponseId', item)) %>%
dplyr::left_join(readr::read_csv(id_recode_filename), by = c('value' = 'response_id')) %>%
tidyr::spread(item, value) %>%
dplyr::mutate(SID = ifelse(!is.na(id), id, SID)) %>%
dplyr::select(-survey_name)
yadsv_v1_fmsi_long <- scorequaltrics::get_survey_data(yadsv_v1_surveys,
credentials,
pid_col = 'ResponseId') %>%
dplyr::filter(grepl(item_name_regex, item)) %>%
dplyr::left_join(yadsv_v1_qid_sid, by = 'ResponseId') %>%
dplyr::select(-ResponseId, -id)
#version 2
yadsv_v2_surveys <- survey_names_splt %>%
filter(grepl('YADS-V v2.0$', SurveyName))
yads_v2_qid_sid <- scorequaltrics::get_survey_data(yadsv_v2_surveys,
credentials,
pid_col = 'SID') %>%
dplyr::filter(grepl('ResponseId', item), SID != 999) %>%
dplyr::left_join(readr::read_csv(id_recode_filename), by = c('value' = 'response_id')) %>%
tidyr::spread(item, value) %>%
dplyr::mutate(SID = ifelse(!is.na(id), id, SID)) %>%
dplyr::select(-survey_name)
yadsv_v2_fmsi_long <- scorequaltrics::get_survey_data(yadsv_v2_surveys,
credentials,
pid_col = 'ResponseId') %>%
dplyr::filter(grepl(item_name_regex, item)) %>%
dplyr::left_join(yads_v2_qid_sid, by = 'ResponseId') %>%
dplyr::select(-ResponseId, -id)
#version 2, online
yadsv_v2o_surveys <- survey_names_splt %>%
filter(grepl('YADS-V v2.0 - Online$', SurveyName))
yadsv_v2o_qid_sid <- scorequaltrics::get_survey_data(yadsv_v2o_surveys,
credentials,
pid_col = 'participantid') %>%
dplyr::rename(SID = participantid) %>%
dplyr::filter(grepl('qid', item)) %>%
dplyr::left_join(readr::read_csv(id_recode_filename), by = c('value' = 'response_id')) %>%
dplyr::filter(SID != 99999) %>%
dplyr::mutate(SID = ifelse(!is.na(id), id, SID)) %>%
tidyr::spread(item, value) %>%
dplyr::select(-survey_name)
yadsv_v2o_fsmi_long <- scorequaltrics::get_survey_data(yadsv_v2o_surveys,
credentials,
pid_col = 'qid') %>%
dplyr::filter(grepl(item_name_regex, item) | grepl('(SES_12$|SES_13$)', item)) %>%
dplyr::left_join(yadsv_v2o_qid_sid, by = 'qid') %>%
dplyr::select(-qid, -id)
#TDS2
tds2_surveys <- survey_names_splt %>%
filter(grepl('(TDS2 Session 3 - Child$|TDS2 Session 1)', SurveyName))
tds2_qid_sid <- scorequaltrics::get_survey_data(tds2_surveys,
credentials,
pid_col = 'SID') %>%
dplyr::filter(grepl('qid', item), SID != 999,
!value %in% c('R_eb4zIauAyliE9mZ',
'R_bBeojkejt41450N',
'R_2rkmtZI2W9Khoto',
'R_b1xlYl5pNEoie1H'), #participant has odd pattern of responses for session 1
value != 'R_vSIlF9BkXhrLxZv', #participant restarted questionnaire
value != 'R_1QbE1GP5I0CGwSa' #participant restarted questionnaire
) %>%
dplyr::left_join(readr::read_csv(id_recode_filename), by = c('value' = 'response_id')) %>%
tidyr::spread(item, value) %>%
dplyr::mutate(SID = ifelse(!is.na(id), id, SID)) %>%
dplyr::select(-survey_name)
tds2_fmsi_long <- scorequaltrics::get_survey_data(tds2_surveys,
credentials,
pid_col = 'qid') %>%
dplyr::filter(grepl(item_name_regex, item) & !(grepl('SES', item) & grepl('Session 1', survey_name))) %>%
dplyr::left_join(tds2_qid_sid, by = 'qid') %>%
dplyr::select(-qid, -id)
#TDS1
tds1_surveys <- survey_names_splt %>%
filter(grepl('(TDS1, Session 3 - Child$|TDS1 Session 1 - Post|TDS1 Session 1 - Pre)', SurveyName))
tds1_qid_sid <- scorequaltrics::get_survey_data(tds1_surveys,
credentials,
pid_col = 'SID') %>%
dplyr::filter(grepl('qid', item), SID != 999, SID != '') %>%
dplyr::left_join(readr::read_csv(id_recode_filename), by = c('value' = 'response_id')) %>%
tidyr::spread(item, value) %>%
dplyr::mutate(SID = ifelse(!is.na(id), id, SID)) %>%
dplyr::select(-survey_name)
tds1_fmsi_long <- scorequaltrics::get_survey_data(tds1_surveys,
credentials,
pid_col = 'qid') %>%
dplyr::filter(grepl(item_name_regex, item) & !(grepl('SES', item) & grepl('Session 1', survey_name))) %>%
dplyr::left_join(tds1_qid_sid, by = 'qid') %>%
dplyr::select(-qid, -id)
yadsv_fsmi_long <- dplyr::bind_rows(
yadsv_v1_fmsi_long,
yadsv_v2_fmsi_long,
yadsv_v2o_fsmi_long)
tds_fsmi_long <- dplyr::bind_rows(
tds1_fmsi_long,
tds2_fmsi_long)
SID_filter <- '([1234]\\d\\d|[34]\\d{4}$|[1234]\\d\\d|99386)'
yadsv_fsmi_long_nodupes <- yadsv_fsmi_long %>%
filter(grepl(SID_filter, SID)) %>%
scorequaltrics::clean_dupes(pid_col = 'SID')
tds_fsmi_long_nodupes <- tds_fsmi_long %>%
filter(grepl(SID_filter, SID)) %>%
scorequaltrics::clean_dupes(pid_col = 'SID')#Check that dropped values weren't ambiguous
yadsv_fsmi_long_nodupes %>%
filter(dropped) %>%
group_by(SID, item) %>%
filter(!all(length(unlist(old.value)) < 1)) %>%
mutate(old.value = paste(old.value, collaps = ' ')) %>%
knitr::kable(caption = "Questionnaire dupes")
tds_fsmi_long_nodupes %>%
filter(dropped) %>%
group_by(SID, item) %>%
filter(!all(length(unlist(old.value)) < 1)) %>%
mutate(old.value = paste(old.value, collaps = ' ')) %>%
knitr::kable(caption = "Questionnaire dupes")anon_id_cols <- readr::cols(
id = readr::col_character(),
sample = readr::col_character(),
anon_id = readr::col_integer(),
anon_sample = readr::col_character(),
exclude = readr::col_integer()
)
yadsv_fsmi_long_deid <- dplyr::full_join(
yadsv_fsmi_long_nodupes,
dplyr::filter(
readr::read_csv(anon_id_filename, col_types = anon_id_cols),
sample %in% c('yads', 'yads_online')),
by = c('SID' = 'id'))
if(!all(
dim(filter(yadsv_fsmi_long_deid, is.na(anon_id), !is.na(SID), is.na(exclude)))[1] == 0,
dim(filter(yadsv_fsmi_long_deid,
!is.na(anon_id), is.na(SID), anon_sample %in% c('yads', 'yads_online')))[1] == 0)) {
stop("Some participants not accounted for, or not anonymized")
} else {
yadsv_fsmi_long_deid <- dplyr::left_join(
yadsv_fsmi_long_nodupes,
dplyr::filter(
readr::read_csv(anon_id_filename, col_types = anon_id_cols),
sample %in% c('yads', 'yads_online')),
by = c('SID' = 'id')) %>%
dplyr::ungroup() %>%
dplyr::filter(is.na(exclude)) %>%
dplyr::select(-SID, -exclude, -sample) %>%
dplyr::rename(SID = anon_id, sample = anon_sample)
}
tds_fsmi_long_deid <- dplyr::full_join(
tds_fsmi_long_nodupes,
dplyr::filter(
readr::read_csv(anon_id_filename, col_types = anon_id_cols),
sample %in% c('TDS1', 'TDS2', 'TDS3')),
by = c('SID' = 'id'))
if(!all(
dim(filter(tds_fsmi_long_deid, is.na(anon_id), !is.na(SID), is.na(exclude)))[1] == 0,
dim(filter(tds_fsmi_long_deid,
!is.na(anon_id), is.na(SID), anon_sample %in% c('TDS1', 'TDS2', 'TDS3')))[1] == 0)) {
stop("Some participants not accounted for, or not anonymized")
} else {
tds_fsmi_long_deid <- dplyr::left_join(
tds_fsmi_long_nodupes,
dplyr::filter(
readr::read_csv(anon_id_filename, col_types = anon_id_cols),
sample %in% c('TDS1', 'TDS2', 'TDS3')),
by = c('SID' = 'id')) %>%
dplyr::ungroup() %>%
dplyr::filter(is.na(exclude)) %>%
dplyr::select(-SID, -exclude, -sample) %>%
dplyr::rename(SID = anon_id, sample = anon_sample)
}
splt_fsmi_long_deid <- dplyr::bind_rows(tds_fsmi_long_deid, yadsv_fsmi_long_deid) %>%
dplyr::mutate(item = ifelse(grepl('^SES_1[23]_1_TEXT$', item),
sub('^(SES_1[23])_1_TEXT$', '\\1', item),
item))