age-and-puberty-data.RmdThis vignette is here just to show you the code used to agglomerate and save the splt_dev_and_demog.rda file that comes with this package.
library(dplyr)
library(tidyr)
credentials <- scorequaltrics::creds_from_file(qualtrics_api_token_file)
survey_names_all <- scorequaltrics::get_surveys(credentials)
survey_names_splt <- dplyr::filter(survey_names_all, grepl('.*(TDS1|TDS2|TDS3|YADS).*', SurveyName))# We only need PDS data
tds2_wave2_surveys <- survey_names_splt %>%
filter(grepl('TDS2 Session 3 - Child$', SurveyName))
tds2_wave2_qid_sid <- scorequaltrics::get_survey_data(tds2_wave2_surveys,
credentials,
pid_col = 'SID') %>%
dplyr::filter(grepl('qid', item), SID != 406) %>% #temporarily remove 406
tidyr::spread(item, value) %>%
dplyr::left_join(readr::read_csv(id_recode_filename), by = c('qid' = 'response_id')) %>%
dplyr::mutate(SID = ifelse(!is.na(id), id, SID)) %>%
dplyr::select(-survey_name)
tds2_wave2_long <- scorequaltrics::get_survey_data(tds2_wave2_surveys,
credentials,
pid_col = 'qid') %>%
dplyr::filter(grepl('PDS', item)) %>%
dplyr::left_join(tds2_wave2_qid_sid, by = 'qid') %>%
dplyr::select(-qid, -id)
tds2_wave2_pds_rubrics <- data.frame(file = dir(file.path(tds2_wave2_rubric_dir),
pattern = 'PDS_scoring_rubric_TDS2_S3.csv',
full.names = TRUE))
tds2_wave2_scoring_data_long <- scorequaltrics::get_rubrics(tds2_wave2_pds_rubrics,
type = 'scoring')
#Clean and de-dupe parent and child data
tds2_wave2_long_nodupes <- tds2_wave2_long %>%
scorequaltrics::get_items_in_rubric(tds2_wave2_scoring_data_long) %>%
filter(grepl('[14]\\d\\d', SID)) %>%
scorequaltrics::clean_dupes(pid_col = 'SID')#Check that dropped values weren't ambiguous
tds2_wave2_long_nodupes %>%
filter(dropped) %>%
group_by(SID, item) %>%
filter(!all(length(unlist(old.value)) < 1)) %>%
mutate(old.value = paste(old.value, collaps = ' ')) %>%
knitr::kable(caption = "Questionnaire dupes")tds2_wave2_scored_pds <- scorequaltrics::score_questionnaire(tds2_wave2_long_nodupes,
tds2_wave2_scoring_data_long) %>%
dplyr::filter(scored_scale %in% pds_scales)
tds2_wave2_scored_pdss <- scorequaltrics::score_pdss(tds2_wave2_long_nodupes,
gender_mix = pdss_gender_mix,
gendercode = pdss_gender_code)
tds2_wave2_scored_puberty <- bind_rows(tds2_wave2_scored_pds, tds2_wave2_scored_pdss)# We only need PDS data
tds1_wave2_surveys <- survey_names_splt %>%
filter(grepl('TDS1, Session 3 - Child', SurveyName))
tds1_wave2_qid_sid <- scorequaltrics::get_survey_data(tds1_wave2_surveys,
credentials,
pid_col = 'SID') %>%
dplyr::filter(grepl('qid', item)) %>%
dplyr::left_join(readr::read_csv(id_recode_filename), by = c('value' = 'response_id')) %>%
dplyr::filter(grepl('3\\d\\d', SID)) %>%
tidyr::spread(item, value) %>%
dplyr::mutate(SID = ifelse(!is.na(id), id, SID)) %>%
dplyr::select(-survey_name)
tds1_wave2_long <- scorequaltrics::get_survey_data(tds1_wave2_surveys,
credentials,
pid_col = 'qid') %>%
dplyr::filter(grepl('PDS', item)) %>%
dplyr::left_join(tds1_wave2_qid_sid, by = 'qid') %>%
dplyr::select(-qid, -id)
tds1_wave2_pds_rubrics <- data.frame(file = dir(file.path(tds1_wave2_rubric_dir),
pattern = 'PDS_scoring_rubric_TDS1_S3_POST.csv',
full.names = TRUE))
tds1_wave2_scoring_data_long <- scorequaltrics::get_rubrics(tds1_wave2_pds_rubrics,
type = 'scoring')
#Clean and de-dupe parent and child data
tds1_wave2_long_nodupes <- tds1_wave2_long %>%
scorequaltrics::get_items_in_rubric(tds1_wave2_scoring_data_long) %>%
filter(grepl('3\\d\\d', SID)) %>%
scorequaltrics::clean_dupes(pid_col = 'SID')#Check that dropped values weren't ambiguous
tds1_wave2_long_nodupes %>%
filter(dropped) %>%
group_by(SID, item) %>%
filter(!all(length(unlist(old.value)) < 1)) %>%
mutate(old.value = paste(old.value, collaps = ' ')) %>%
knitr::kable(caption = "Questionnaire dupes")tds1_wave2_scored_pds <- scorequaltrics::score_questionnaire(tds1_wave2_long_nodupes,
tds1_wave2_scoring_data_long) %>%
dplyr::filter(scored_scale %in% pds_scales)
tds1_wave2_scored_pdss <- scorequaltrics::score_pdss(tds1_wave2_long_nodupes,
gender_mix = pdss_gender_mix,
gendercode = pdss_gender_code)
tds1_wave2_scored_puberty <- dplyr::bind_rows(tds1_wave2_scored_pds, tds1_wave2_scored_pdss)
yadsv_v1_surveys <- survey_names_splt %>%
filter(grepl('YADS-V$', SurveyName))
yadsv_v1_qid_sid <- scorequaltrics::get_survey_data(yadsv_v1_surveys,
credentials,
pid_col = 'SID') %>%
dplyr::filter(grepl('qid', item)) %>%
dplyr::left_join(readr::read_csv(id_recode_filename), by = c('value' = 'response_id')) %>%
tidyr::spread(item, value) %>%
dplyr::mutate(SID = ifelse(!is.na(id), id, SID)) %>%
dplyr::select(-survey_name)
yadsv_v1_long <- scorequaltrics::get_survey_data(yadsv_v1_surveys,
credentials,
pid_col = 'qid') %>%
dplyr::filter(
grepl('(PDS|PAL_2_Age|PAL_2_Race|Q176|StartTime|Q173_1_TEXT|StartDate)',
item)) %>%
dplyr::left_join(yadsv_v1_qid_sid, by = 'qid') %>%
dplyr::select(-qid, -id)
yadsv_v1_pds_rubrics <- data.frame(file = dir(file.path(tds2_wave2_rubric_dir),
pattern = 'PDS_scoring_rubric_TDS2_S3.csv',
full.names = TRUE))
yadsv_v1_scoring_data_long <- scorequaltrics::get_rubrics(yadsv_v1_pds_rubrics,
type = 'scoring')
#Clean and de-dupe parent and child data
yadsv_v1_long_nodupes <- yadsv_v1_long %>%
filter(grepl('[1234]\\d\\d', SID)) %>%
scorequaltrics::get_items_in_rubric(yadsv_v1_scoring_data_long) %>%
scorequaltrics::clean_dupes(pid_col = 'SID')
#demographics only
yadsv_v1_pal_2_demo <- yadsv_v1_long %>%
dplyr::filter(grepl('[1234]\\d\\d', SID),
grepl('(PAL_2_Age|PAL_2_Race|Q173_1_TEXT|StartDate)', item)) %>%
tidyr::spread(item, value) %>%
dplyr::rename(age = PAL_2_Age, first_name = Q173_1_TEXT, occur_date = StartDate) %>%
dplyr::mutate(occur_date = as.character(lubridate::date(occur_date)))
sona_kapp_demo_cols <- readr::cols(
first_name = readr::col_character(),
occur_date = readr::col_character(),
study = readr::col_character(),
age = readr::col_integer(),
gender = readr::col_character(),
ethnicity = readr::col_character())
sona_kapp_demo <- dplyr::filter(
readr::read_csv(file.path(data_dir, 'sona_data.csv'),
col_types = sona_kapp_demo_cols),
study == 'kapp')
yadsv_v1_pal_2_demo <- dplyr::select(
dplyr::left_join(
yadsv_v1_pal_2_demo,
sona_kapp_demo,
by = c('first_name', 'occur_date')),
-first_name, -occur_date, -study)
#inf relations only
yadsv_v1_inf_rel <- yadsv_v1_long %>%
dplyr::filter(grepl('[1234]\\d\\d', SID),
grepl('Q176', item))#Check that dropped values weren't ambiguous
yadsv_v1_long_nodupes %>%
filter(dropped) %>%
group_by(SID, item) %>%
filter(!all(length(unlist(old.value)) < 1)) %>%
mutate(old.value = paste(old.value, collaps = ' ')) %>%
knitr::kable(caption = "Questionnaire dupes")yadsv_v1_scored_pds <- scorequaltrics::score_questionnaire(yadsv_v1_long_nodupes,
yadsv_v1_scoring_data_long) %>%
dplyr::filter(scored_scale %in% pds_scales)
yadsv_v1_scored_pdss <- scorequaltrics::score_pdss(yadsv_v1_long_nodupes,
gender_mix = pdss_gender_mix,
gendercode = pdss_gender_code)
yadsv_v1_scored_puberty <- dplyr::bind_rows(yadsv_v1_scored_pds, yadsv_v1_scored_pdss)yadsv_v2_surveys <- survey_names_splt %>%
filter(grepl('YADS-V v2.0$', SurveyName))
yadsv_v2_qid_sid <- scorequaltrics::get_survey_data(yadsv_v2_surveys,
credentials,
pid_col = 'SID') %>%
dplyr::filter(grepl('qid', item)) %>%
dplyr::left_join(readr::read_csv(id_recode_filename), by = c('value' = 'response_id')) %>%
dplyr::filter(SID != 999) %>%
dplyr::mutate(SID = ifelse(!is.na(id), id, SID)) %>%
tidyr::spread(item, value) %>%
dplyr::select(-survey_name)
yadsv_v2_long <- scorequaltrics::get_survey_data(yadsv_v2_surveys,
credentials,
pid_col = 'qid') %>%
dplyr::filter(grepl('(PDS|Q327|Q329|inf_rel|px_info_1_TEXT|StartDate)', item)) %>%
dplyr::left_join(yadsv_v2_qid_sid, by = 'qid') %>%
dplyr::select(-qid, -id)
yadsv_v2_pds_rubrics <- data.frame(file = dir(file.path(tds2_wave2_rubric_dir),
pattern = 'PDS_scoring_rubric_TDS2_S3.csv',
full.names = TRUE))
yadsv_v2_scoring_data_long <- scorequaltrics::get_rubrics(yadsv_v2_pds_rubrics,
type = 'scoring')
SID_filter <- '([1234]\\d\\d|99386)'
#Clean and de-dupe parent and child data
yadsv_v2_long_nodupes <- yadsv_v2_long %>%
filter(grepl(SID_filter, SID)) %>%
scorequaltrics::get_items_in_rubric(yadsv_v2_scoring_data_long) %>%
scorequaltrics::clean_dupes(pid_col = 'SID')
#demographics only
yadsv_v2_pal_2_demo <- yadsv_v2_long %>%
dplyr::filter(grepl(SID_filter, SID),
grepl('(Q327|Q329|px_info_1_TEXT|StartDate)', item)) %>%
tidyr::spread(item, value) %>%
dplyr::rename('age' = Q327,
'PAL_2_Race' = Q329,
'PAL_2_Race_TEXT' = Q329_TEXT,
first_name = px_info_1_TEXT, occur_date = StartDate) %>%
dplyr::mutate(occur_date = as.character(lubridate::date(occur_date)))
yadsv_v2_pal_2_demo <- dplyr::select(
dplyr::left_join(
yadsv_v2_pal_2_demo,
sona_kapp_demo,
by = c('first_name', 'occur_date')),
-first_name, -occur_date, -study)
#inf relations only
yadsv_v2_inf_rel <- yadsv_v2_long %>%
dplyr::filter(grepl(SID_filter, SID),
grepl('inf_rel', item))#Check that dropped values weren't ambiguous
yadsv_v2_long_nodupes %>%
filter(dropped) %>%
group_by(SID, item) %>%
filter(!all(length(unlist(old.value)) < 1)) %>%
mutate(old.value = paste(old.value, collaps = ' ')) %>%
knitr::kable(caption = "Questionnaire dupes")yadsv_v2_scored_pds <- scorequaltrics::score_questionnaire(yadsv_v2_long_nodupes,
yadsv_v2_scoring_data_long) %>%
dplyr::filter(scored_scale %in% pds_scales)
yadsv_v2_scored_pdss <- scorequaltrics::score_pdss(yadsv_v2_long_nodupes,
gender_mix = pdss_gender_mix,
gendercode = pdss_gender_code)
yadsv_v2_scored_puberty <- dplyr::bind_rows(yadsv_v2_scored_pds, yadsv_v2_scored_pdss)yadsv_v2o_surveys <- survey_names_splt %>%
filter(grepl('YADS-V v2.0 - Online$', SurveyName))
yadsv_v2o_qid_sid <- scorequaltrics::get_survey_data(yadsv_v2o_surveys,
credentials,
pid_col = 'participantid') %>%
dplyr::rename(SID = participantid) %>%
dplyr::filter(grepl('qid', item)) %>%
dplyr::left_join(readr::read_csv(id_recode_filename), by = c('value' = 'response_id')) %>%
dplyr::filter(SID != 99999) %>%
dplyr::mutate(SID = ifelse(!is.na(id), id, SID)) %>%
tidyr::spread(item, value) %>%
dplyr::select(-survey_name)
yadsv_v2o_long <- scorequaltrics::get_survey_data(yadsv_v2o_surveys,
credentials,
pid_col = 'qid') %>%
dplyr::filter(grepl('(PDS|Q327|Q329|inf_rel)', item)) %>%
dplyr::left_join(yadsv_v2o_qid_sid, by = 'qid') %>%
dplyr::select(-qid, -id)
yadsv_v2o_pds_rubrics <- data.frame(file = dir(file.path(tds2_wave2_rubric_dir),
pattern = 'PDS_scoring_rubric_TDS2_S3.csv',
full.names = TRUE))
yadsv_v2o_scoring_data_long <- scorequaltrics::get_rubrics(yadsv_v2o_pds_rubrics,
type = 'scoring')
SID_filter <- '([34]\\d{4}$)'
yadsv_v2o_long_nodupes <- yadsv_v2o_long %>%
filter(grepl(SID_filter, SID)) %>%
scorequaltrics::get_items_in_rubric(yadsv_v2o_scoring_data_long) %>%
scorequaltrics::clean_dupes(pid_col = 'SID')
#demographics only
yadsv_v2o_pal_2_demo <- yadsv_v2o_long %>%
dplyr::filter(grepl(SID_filter, SID),
grepl('(Q327|Q329)', item)) %>%
tidyr::spread(item, value) %>%
dplyr::rename('PAL_2_Age' = Q327,
'PAL_2_Race' = Q329,
'PAL_2_Race_TEXT' = Q329_TEXT)
#we can fill in missing ages with ages given on the consent form
yadsv_v2o_surveys_cons <- survey_names_splt %>%
filter(grepl('YADS-V Consent - Online', SurveyName))
yadsv_v2o_long_cons_age <- scorequaltrics::get_survey_data(yadsv_v2o_surveys_cons,
credentials,
pid_col = 'participantid') %>%
dplyr::filter(grepl('check_age', item), !is.na(as.numeric(value))) %>%
dplyr::distinct(participantid, item, value) %>%
dplyr::group_by(participantid) %>%
dplyr::mutate(n = n()) %>%
dplyr::filter(n == 1) %>%
dplyr::select(-n) %>%
tidyr::spread(item, value)
yadsv_v2o_pal_2_demo_cons_age <- dplyr::left_join(yadsv_v2o_pal_2_demo,
yadsv_v2o_long_cons_age,
by = c('SID' = 'participantid')) %>%
dplyr::mutate(age = ifelse(!is.na(as.numeric(check_age)),
check_age,
PAL_2_Age))
#finally, we can use general survey data
sona_bartok_demo <- dplyr::filter(
readr::read_csv(file.path(data_dir, 'sona_data.csv'),
col_types = sona_kapp_demo_cols),
study == 'bartok')
sona_bartok_roster <- readr::read_csv(
file.path(data_dir, 'bartok_fname_ids.csv'),
col_types = readr::cols(first_name = readr::col_character(),
survey_id = readr::col_character(),
occur_date = readr::col_character()))
sona_bartok_demo <- left_join(sona_bartok_demo, sona_bartok_roster)
yadsv_v2o_pal_2_demo_cons_age <- dplyr::select(
dplyr::left_join(
yadsv_v2o_pal_2_demo_cons_age,
sona_bartok_demo,
by = c('SID' = 'survey_id')),
-first_name, -occur_date, -study)
#inf relations only
yadsv_v2o_inf_rel <- yadsv_v2o_long %>%
dplyr::filter(grepl(SID_filter, SID),
grepl('inf_rel', item))#Check that dropped values weren't ambiguous
yadsv_v2o_long_nodupes %>%
filter(dropped) %>%
group_by(SID, item) %>%
filter(!all(length(unlist(old.value)) < 1)) %>%
mutate(old.value = paste(old.value, collaps = ' ')) %>%
knitr::kable(caption = "Questionnaire dupes")yadsv_v2o_scored_pds <- scorequaltrics::score_questionnaire(yadsv_v2o_long_nodupes,
yadsv_v2o_scoring_data_long) %>%
dplyr::filter(scored_scale %in% pds_scales)
yadsv_v2o_scored_pdss <- scorequaltrics::score_pdss(yadsv_v2o_long_nodupes,
gender_mix = pdss_gender_mix,
gendercode = pdss_gender_code)
yadsv_v2o_scored_puberty <- dplyr::bind_rows(yadsv_v2o_scored_pds, yadsv_v2o_scored_pdss)demo_col_types <- readr::cols(
sid = readr::col_character(),
s2_date = readr::col_date(format = ""),
s2_age = readr::col_double(),
s3_date = readr::col_date(format = ""),
s3_age = readr::col_double(),
gender = readr::col_integer(),
ethnicity = readr::col_character(),
hispanic_yn = readr::col_integer(),
fsiq2 = readr::col_integer()
)
anon_id_cols <- readr::cols(
id = readr::col_character(),
sample = readr::col_character(),
anon_id = readr::col_integer(),
anon_sample = readr::col_character(),
exclude = readr::col_integer()
)
pal_2_race_eth_mapping <- c(
'1' = 'European American/White',
'2' = 'Native American/American Indian/Alaskan Native',
'3' = 'African American/Black',
'4' = 'Hispanic/Latino',
'5' = 'Asian American',
'6' = 'Pacific Islander/Hawaiian',
'7' = 'Other'
)
tds_wave2_scored_puberty <- dplyr::bind_rows(tds1_wave2_scored_puberty, tds2_wave2_scored_puberty) %>%
dplyr::select(-n_items, -n_missing, -method) %>%
tidyr::unite(scale, scale_name, scored_scale) %>%
tidyr::spread(scale, score) %>%
dplyr::rowwise() %>%
dplyr::mutate_all(funs(ifelse(. == "NaN", NA, .))) %>%
dplyr::mutate(
PDS_mean_score = ifelse(!is.na(PDS_female_mean_score) && is.na(PDS_male_mean_score),
PDS_female_mean_score,
ifelse(is.na(PDS_female_mean_score) && !is.na(PDS_male_mean_score),
PDS_male_mean_score,
NA))) %>%
dplyr::select(-PDS_female_mean_score, -PDS_male_mean_score)
tds_wave2_dev_demo <- dplyr::full_join(tds_wave2_scored_puberty,
readr::read_csv(redcap_demographics_file,
col_types = demo_col_types),
by = c('SID' = 'sid'))
tds_wave2_dev_demo_deid <- dplyr::full_join(
tds_wave2_dev_demo,
dplyr::filter(
readr::read_csv(anon_id_filename, col_types = anon_id_cols),
!sample %in% c('yads', 'yads_online')),
by = c('SID' = 'id'))
#Check to make sure everyone has an anon_id, and if they do, fully anonymize the data
if(!all(
dim(filter(tds_wave2_dev_demo_deid, is.na(anon_id), !is.na(s3_date)))[1] == 0,
dim(filter(tds_wave2_dev_demo_deid,
!is.na(anon_id), is.na(s3_date), is.na(exclude),
anon_sample %in% c('TDS1', 'TDS2')))[1] == 0)) {
stop("Some participants not accounted for, or not anonymized")
} else {
tds_wave2_dev_demo_deid <- dplyr::left_join(
tds_wave2_dev_demo,
dplyr::filter(
readr::read_csv(anon_id_filename, col_types = anon_id_cols),
!sample %in% c('yads', 'yads_online')),
by = c('SID' = 'id')) %>%
dplyr::filter(is.na(exclude), !is.na(s3_date)) %>%
dplyr::mutate(age = round(s3_age + runif(n(), -.1, .1), 2)) %>%
dplyr::select(-SID, -exclude, -sample, -s2_date, -s3_date, -s2_age, -s3_age) %>%
dplyr::rename(SID = anon_id, sample = anon_sample)
}yads_scored_puberty <- dplyr::bind_rows(
yadsv_v1_scored_puberty, yadsv_v2_scored_puberty, yadsv_v2o_scored_puberty) %>%
dplyr::select(-n_items, -n_missing, -method) %>%
tidyr::unite(scale, scale_name, scored_scale) %>%
tidyr::spread(scale, score) %>%
dplyr::rowwise() %>%
dplyr::mutate_all(funs(ifelse(. == "NaN", NA, .))) %>%
dplyr::mutate(
PDS_mean_score = ifelse(!is.na(PDS_female_mean_score) && is.na(PDS_male_mean_score),
PDS_female_mean_score,
ifelse(is.na(PDS_female_mean_score) && !is.na(PDS_male_mean_score),
PDS_male_mean_score,
NA))) %>%
dplyr::select(-PDS_female_mean_score, -PDS_male_mean_score)
yads_dev_demo <- dplyr::full_join(yads_scored_puberty,
dplyr::select(
dplyr::bind_rows(yadsv_v1_pal_2_demo,
yadsv_v2_pal_2_demo,
yadsv_v2o_pal_2_demo_cons_age),
-PAL_2_Age, -check_age, -survey_name),
by = 'SID')
yads_dev_demo <- dplyr::mutate(
yads_dev_demo,
age = ifelse(is.na(age) || age == '',
as.character(sona_age),
age),
PDSS_gender = ifelse(is.na(PDSS_gender) || PDSS_gender == '',
c('Male' = '0', 'Female' = '1')[sona_gender],
PDSS_gender),
ethnicity = ifelse(PAL_2_Race == 7,
PAL_2_Race_TEXT,
pal_2_race_eth_mapping[PAL_2_Race]),
ethnicity = ifelse(is.na(ethnicity) || ethnicity == '',
sona_ethnicity,
ethnicity)) %>%
dplyr::select(-sona_age, -sona_gender, -sona_ethnicity)
yads_dev_demo_deid <- dplyr::full_join(
yads_dev_demo,
dplyr::filter(
readr::read_csv(anon_id_filename, col_types = anon_id_cols),
sample %in% c('yads', 'yads_online')),
by = c('SID' = 'id'))
if(!all(
dim(filter(yads_dev_demo_deid, is.na(anon_id), !is.na(SID), is.na(exclude)))[1] == 0,
dim(filter(yads_dev_demo_deid,
!is.na(anon_id), is.na(SID), is.na(exclude), anon_sample %in% c('yads', 'yads_online')))[1] == 0)) {
stop("Some participants not accounted for, or not anonymized")
} else {
yads_dev_demo_deid <- dplyr::left_join(
yads_dev_demo,
dplyr::filter(
readr::read_csv(anon_id_filename, col_types = anon_id_cols),
sample %in% c('yads', 'yads_online')),
by = c('SID' = 'id')) %>%
dplyr::filter(is.na(exclude)) %>%
dplyr::select(-SID, -exclude, -sample, -PAL_2_Race, -PAL_2_Race_TEXT) %>%
dplyr::rename(SID = anon_id, sample = anon_sample) %>%
dplyr::mutate(gender = as.numeric(PDSS_gender), age = as.numeric(age))
}
splt_dev_and_demog <- dplyr::bind_rows(tds_wave2_dev_demo_deid, yads_dev_demo_deid) %>%
dplyr::left_join(readr::read_csv(ethnicity_recode)) %>%
dplyr::select(-ethnicity) %>%
dplyr::rename(ethnicity = ethnicity_recode)clean_infrel_types <- function(inf_string){
temp <- tolower(inf_string)
# Remove everything that is not a number or letter (may want to keep more
# stuff in your actual analyses).
temp <- stringr::str_replace_all(temp,"-", " ")
temp <- stringr::str_replace_all(temp,"[^a-zA-Z'/\\s]", "")
temp <- stringr::str_replace_all(temp,"[\\s]+", " ")
temp <- stringr::str_replace_all(temp,"(^ | $)", "")
temp <- stringr::str_replace_all(temp,"(co) ", "\\1")
temp <- stringr::str_replace_all(temp,"(boy|girl|best)f", "\\1 f")
temp <- stringr::str_replace_all(temp,"(rommate|roomate)", "roommate")
temp <- stringr::str_replace_all(temp,"(cousic)", "cousin")
temp <- stringr::str_replace_all(temp,"grand(ma|pa|father|mother)", "grandparent")
temp <- stringr::str_replace_all(temp,"step *(father|mother|dad|mom)", "step parent")
temp <- stringr::str_replace_all(temp,"(partener)", "partner")
# Shrink down to just one white space
return(temp)
}
all_relationships <- sort(unique(clean_infrel_types(
c(yadsv_v1_inf_rel$value,
yadsv_v2_inf_rel$value,
yadsv_v2o_inf_rel$value))))
cat(paste(all_relationships, collapse = '\n'))