process_country <- function(cfg, country_key) {
# Load
if (!file.exists(cfg$file)) {
cat(paste0("**", cfg$label, ":** File not found: ", cfg$file, "\n\n"))
return(NULL)
}
raw <- qualtRics::read_survey(cfg$file) |> clean_names()
# Rename key variables (handle different QID numbers per country)
age_col <- cfg$age_qid
if (age_col %in% names(raw)) raw <- raw |> rename(age = !!sym(age_col))
if ("duration_in_seconds" %in% names(raw)) {
raw <- raw |> rename(duration_seconds = duration_in_seconds)
}
# Create sex_id from multi-select columns (varies by survey version)
# Germany new: q62_1 through q62_8
# Austria new: sexuality_id_1 through sexuality_id_8
# Old surveys: single qid column
sex_prefix <- if ("q62_1" %in% names(raw)) "q62" else if ("sexuality_id_1" %in% names(raw)) "sexuality_id" else NULL
if (!is.null(sex_prefix)) {
col1 <- paste0(sex_prefix, "_1")
col2 <- paste0(sex_prefix, "_2")
col3 <- paste0(sex_prefix, "_3")
col4 <- paste0(sex_prefix, "_4")
raw <- raw |> mutate(
sex_id = case_when(
!is.na(.data[[col1]]) ~ "Heterosexuell",
!is.na(.data[[col2]]) ~ "Schwul/lesbisch",
!is.na(.data[[col3]]) ~ "Bisexuell",
!is.na(.data[[col4]]) ~ "Pansexuell",
TRUE ~ NA_character_
)
)
} else {
sex_id_col <- cfg$sex_id_qid
if (sex_id_col %in% names(raw)) raw <- raw |> rename(sex_id = !!sym(sex_id_col))
if (!"sex_id" %in% names(raw)) raw$sex_id <- NA_character_
}
# Handle relationship_type multi-select (new surveys have relationship_type_1 through _5)
if ("relationship_type_1" %in% names(raw) && !"relationship_type" %in% names(raw)) {
raw <- raw |> mutate(
relationship_type = case_when(
!is.na(relationship_type_1) ~ relationship_type_1,
!is.na(relationship_type_2) ~ relationship_type_2,
!is.na(relationship_type_3) ~ relationship_type_3,
!is.na(relationship_type_4) ~ relationship_type_4,
!is.na(relationship_type_5) ~ relationship_type_5,
TRUE ~ NA_character_
)
)
}
# Map sex_pref: handle both text labels and numeric
if (is.character(raw$sex_pref)) {
raw <- raw |> mutate(
sex_pref = case_when(
str_detect(sex_pref, "Nur M.nner") ~ 1,
str_detect(sex_pref, "berwiegend M.nner") ~ 2,
str_detect(sex_pref, "berwiegend Frauen") ~ 3,
str_detect(sex_pref, "Nur Frauen") ~ 4,
str_detect(sex_pref, "gleicherma") ~ 5,
TRUE ~ NA_real_
)
)
}
# Filter
n_raw <- nrow(raw)
# Handle both logical and numeric finished
if (is.logical(raw$finished)) {
raw <- raw |> filter(finished == TRUE)
} else {
raw <- raw |> filter(finished == 1)
}
n_finished <- nrow(raw)
# Handle both text and numeric consent
if (is.character(raw$particip_agree)) {
raw <- raw |> filter(str_detect(particip_agree, "stimme zu|Ich stimme"))
} else {
raw <- raw |> filter(particip_agree == 1)
}
n_consented <- nrow(raw)
# --- Reshape attributes ---
attributes_long <- raw |>
select(response_id, starts_with("rd_")) |>
mutate(across(starts_with("rd_"), as.character)) |>
pivot_longer(cols = -response_id, names_to = "key", values_to = "value") |>
separate(key, into = c("prefix", "round", "attribute"), sep = "_", extra = "merge") |>
select(-prefix) |>
mutate(round = as.character(round)) |>
pivot_wider(names_from = attribute, values_from = value)
# --- Extract decisions ---
# QID numbers differ between Germany and Austria surveys
# Germany: men=350-368, women=310-328
# Austria: men=348-366, women=308-326
if ("qid350" %in% names(raw)) {
men_qids <- paste0("qid", seq(350, 368, by = 2))
women_qids <- paste0("qid", seq(310, 328, by = 2))
} else {
men_qids <- paste0("qid", seq(348, 366, by = 2))
women_qids <- paste0("qid", seq(308, 326, by = 2))
}
decision_map <- tibble(
qid = c(men_qids, women_qids),
arm_round = c(1:10, 1:10),
arm = c(rep("mw", 10), rep("ww", 10))
)
available_qids <- intersect(decision_map$qid, names(raw))
decision_map_avail <- decision_map |> filter(qid %in% available_qids)
decisions_raw <- raw |>
select(response_id, sex_pref, all_of(decision_map_avail$qid)) |>
pivot_longer(cols = -c(response_id, sex_pref), names_to = "qid", values_to = "choice_text") |>
left_join(decision_map_avail, by = "qid") |>
filter(!is.na(choice_text) & choice_text != "")
# Map text or numeric choices
decisions <- decisions_raw |>
mutate(
choice = case_when(
choice_text %in% c("Cancel", "1") ~ 1,
choice_text %in% c("star", "2") ~ 2,
choice_text %in% c("Heart", "4") ~ 4,
TRUE ~ suppressWarnings(as.numeric(choice_text))
),
round = case_when(
sex_pref != 5 ~ as.character(arm_round),
sex_pref == 5 & arm == "mw" ~ as.character(2 * arm_round - 1),
sex_pref == 5 & arm == "ww" ~ as.character(2 * arm_round)
)
) |>
filter(!is.na(choice)) |>
mutate(
selected = ifelse(choice != 1, 1, 0),
swipe = case_when(choice == 1 ~ "Reject", choice == 2 ~ "Super Like", choice == 4 ~ "Like")
)
# --- Merge ---
conjoint <- decisions |>
select(response_id, round, choice, selected, swipe, arm) |>
inner_join(attributes_long, by = c("response_id", "round"))
respondent_cols <- c("response_id", "gender", "sex_pref", "education_level",
"occupation", "age", "religion", "religiosity",
"party_id", "party_id_squeeze", "party_id_strength", "ideology_lr",
"political_interest", "tinder", "tinder_2",
"relationship_type", "relationship_now", "sex_id",
"duration_seconds", "start_date", "end_date", "ip_address")
available_resp <- intersect(respondent_cols, names(raw))
# Add bundesland/region if present
if ("bundesland" %in% names(raw)) available_resp <- c(available_resp, "bundesland")
respondent_vars <- raw |> select(all_of(available_resp))
# Rename to avoid collision
if ("gender" %in% names(respondent_vars)) {
respondent_vars <- respondent_vars |> rename(resp_gender_raw = gender)
}
if ("age" %in% names(respondent_vars)) {
respondent_vars <- respondent_vars |> rename(resp_age_raw = age)
}
if ("relationship_type" %in% names(respondent_vars)) {
respondent_vars <- respondent_vars |> rename(resp_relationship_type = relationship_type)
}
conjoint <- conjoint |> left_join(respondent_vars, by = "response_id")
# Drop missing engine data
conjoint <- conjoint |>
filter(!is.na(party_label), party_label != "",
!is.na(person), person != "")
# --- Recode party ---
conjoint$PARTY <- NA_character_
for (party_name in names(cfg$party_detect)) {
pattern <- cfg$party_detect[[party_name]]
if (party_name == cfg$far_right) {
# Far-right: match only if no "Niemals" present
idx <- which(is.na(conjoint$PARTY) &
str_detect(conjoint$party_label, pattern) &
!str_detect(conjoint$party_label, "Niemals|niemals"))
} else {
idx <- which(is.na(conjoint$PARTY) &
str_detect(conjoint$party_label, pattern))
}
if (length(idx) > 0) conjoint$PARTY[idx] <- party_name
}
conjoint <- conjoint |> mutate(
REJECTION = case_when(
str_detect(party_label, "Niemals|niemals") ~ "Anti-far-right signal",
PARTY == cfg$far_right ~ paste0(cfg$far_right, " (no signal possible)"),
TRUE ~ "No signal"
),
REJECTION_BINARY = ifelse(str_detect(party_label, "Niemals|niemals"), 1, 0),
REJECTION_CLEAN = factor(
ifelse(REJECTION_BINARY == 1, "With rejection signal", "Without rejection signal"),
levels = c("Without rejection signal", "With rejection signal")
)
)
# --- Recode other attributes ---
conjoint <- conjoint |> mutate(
base_person = str_extract(person, "^[WM]\\d+"),
variant_num = as.numeric(str_extract(person, "\\d+$")),
FLAG = case_when(
nationality == "" | is.na(nationality) ~ "No flag",
str_detect(nationality, "german-european|austrian-european") ~ "National + EU flag",
str_detect(nationality, "^german$|^austrian$") ~ "National flag",
str_detect(nationality, "^european$") ~ "EU flag",
TRUE ~ nationality
),
BI = factor(ifelse(bisexual == TRUE | bisexual == "true" | bisexual == "TRUE",
"Bisexual", "Heterosexual"),
levels = c("Heterosexual", "Bisexual")),
TRANS = factor(ifelse(trans == TRUE | trans == "true" | trans == "TRUE",
"Trans", "Cis"),
levels = c("Cis", "Trans")),
VEGAN = factor(ifelse(vegan != "" & !is.na(vegan) & vegan != "FALSE" & vegan != FALSE,
"Vegan", "Not vegan"),
levels = c("Not vegan", "Vegan")),
CONGRUENCE = factor(
ifelse(gender_congruence == "yes", "Gender congruent", "Non-congruent"),
levels = c("Non-congruent", "Gender congruent")
),
EMOTION = factor(
ifelse(!is.na(emotion) & emotion != "",
ifelse(str_detect(emotion, "smiling|Smiling"), "Smiling", "Neutral"),
ifelse(variant_num %% 2 == 1, "Smiling", "Neutral")),
levels = c("Neutral", "Smiling")
),
DISTANCE = factor(case_when(
str_detect(distance, "<1|weniger") ~ "<1km",
str_detect(distance, "1-5|1 bis 5") ~ "1-5km",
str_detect(distance, "5-10|5 bis 10") ~ "5-10km",
str_detect(distance, ">10|mehr als 10") ~ ">10km",
TRUE ~ NA_character_
), levels = c("<1km", "1-5km", "5-10km", ">10km")),
ETHNICITY = factor(
ifelse(!is.na(ethnicity) & ethnicity != "", ethnicity, ethnicity_map[base_person]),
levels = c("German", "Turkish", "Eastern European", "Other")
),
SOLIDARITY = case_when(
international_solidarity == "" | is.na(international_solidarity) ~ "None",
str_detect(international_solidarity, "FUCK TRUMP") ~ "Anti-Trump",
str_detect(international_solidarity, "ProTRUMP") ~ "Pro-Trump",
str_detect(international_solidarity, "ProPutin") ~ "Pro-Putin",
str_detect(international_solidarity, "Ukraine") ~ "Pro-Ukraine",
TRUE ~ international_solidarity
),
RELATIONSHIP_SEEK = case_when(
str_detect(relationship_type, "feste Partnerschaft") ~ "Committed",
str_detect(relationship_type, "offene Beziehung") ~ "Open",
str_detect(relationship_type, "unverbindliche") ~ "Casual",
str_detect(relationship_type, "offen fur alles|offen für alles") ~ "Open to anything",
str_detect(relationship_type, "Freundschaft") ~ "FWB",
TRUE ~ "Unknown"
),
CULTURAL_TYPE = factor(
ifelse(cultural_identity %in% progressive_statements, "Progressive", "Conservative"),
levels = c("Conservative", "Progressive")
),
PROFILE_GENDER = factor(
ifelse(str_detect(tolower(gender), "woman|frau"), "Woman", "Man"),
levels = c("Woman", "Man")
)
)
# Recode SES
conjoint <- conjoint |> mutate(
JOB_SLIM = case_when(
job %in% c("Rechtsanwaltschaft/Jura", "Steuerberatung / Wirtschaftsprufung",
"Leitende Tatigkeit im Personalwesen", "Arztlich-medizinisches Personal",
"Bauingenieurwesen", "Webdesign", "Architekturburo",
"Wissenschaftliche Tatigkeit an einer Universitat",
"Softwareentwicklung", "Finanzanalyse",
"Hoherer offentlicher Dienst", "Leitung Unternehmensberatung",
"Steuerberatung / Wirtschaftsprüfung",
"Leitende Tätigkeit im Personalwesen", "Ärztlich-medizinisches Personal",
"Architekturbüro", "Wissenschaftliche Tätigkeit an einer Universität",
"Höherer öffentlicher Dienst") ~ "Upper professional",
job %in% c("Pflegefachkraft", "Rettungsdienst", "IT-Fachkraft",
"Lehrkraft an einer weiterfuhrenden Schule",
"Lehrkraft an einer berufsbildenden Schule",
"Personliche Assistenz", "Lehrkraft an einer Grundschule",
"Lehrkraft an einer Volksschule",
"Laborfachkraft", "Polizei", "Grafikdesign",
"Offentlicher Dienst", "Finanzberatung",
"Filialleitung im Einzelhandel", "Fitnesscoach",
"Lehrkraft an einer weiterführenden Schule",
"Persönliche Assistenz", "Öffentlicher Dienst") ~ "Lower professional",
job %in% c("Anstellung im Tatowierstudio", "Zahnmedizinische Assistenz",
"Fachkraft fur Elektrotechnik", "Buroassistenz",
"Pharmazeutische Assistenz", "Unterrichtsassistenz (padagogisch)",
"Maschinen- und Fahrzeugtechnik", "Abteilungsleitung im Einzelhandel",
"Assistenz im Personalwesen", "Immobilienvermittlung",
"Styling im Modebereich",
"Anstellung im Tätowierstudio", "Fachkraft für Elektrotechnik",
"Büroassistenz", "Unterrichtsassistenz (pädagogisch)") ~ "Skilled manual/service",
job %in% c("Fachkraft im Einzelhandel", "Kassenmitarbeit im Einzelhandel",
"Anstellung im Bauwesen", "Lkw-Fahrdienst", "Kurierdienst",
"Reinigungspersonal", "Barservice", "Service im Restaurant",
"Callcenter-Tatigkeit", "Arbeit im Lagerbereich",
"Warenverraumung", "Taxifahrdienst",
"Callcenter-Tätigkeit", "Warenverräumung") ~ "Unskilled/routine",
TRUE ~ "Unclassified"
)
)
# --- Recode respondent variables ---
conjoint <- conjoint |> mutate(
resp_gender = factor(case_when(
str_detect(resp_gender_raw, "Mann|mann") ~ "Man",
str_detect(resp_gender_raw, "Frau|frau") ~ "Woman",
str_detect(resp_gender_raw, "bin.r") ~ "Non-binary",
resp_gender_raw == "1" ~ "Man",
resp_gender_raw == "2" ~ "Woman",
TRUE ~ NA_character_
), levels = c("Man", "Woman", "Non-binary")),
resp_age = factor(case_when(
str_detect(resp_age_raw, "18.29") ~ "18-29",
str_detect(resp_age_raw, "30.39") ~ "30-39",
str_detect(resp_age_raw, "40.49") ~ "40-49",
str_detect(resp_age_raw, "50.59") ~ "50-59",
str_detect(resp_age_raw, "60.74") ~ "60-74",
resp_age_raw == "1" ~ "18-29",
resp_age_raw == "2" ~ "30-39",
resp_age_raw == "3" ~ "40-49",
resp_age_raw == "4" ~ "50-59",
resp_age_raw == "5" ~ "60-74",
TRUE ~ NA_character_
), levels = c("18-29", "30-39", "40-49", "50-59", "60-74")),
ideology_numeric = as.numeric(str_extract(as.character(ideology_lr), "^\\d+")),
derecha = factor(case_when(
ideology_numeric > 5 ~ "Right-wing",
ideology_numeric < 5 ~ "Left-wing",
TRUE ~ NA_character_
), levels = c("Left-wing", "Right-wing")),
resp_farright = factor(ifelse(
str_detect(party_id, cfg$far_right) | str_detect(coalesce(party_id_squeeze, ""), cfg$far_right),
paste0(cfg$far_right, " supporter"),
paste0("Not ", cfg$far_right, " supporter")
)),
resp_lgbtq = factor(case_when(
is.na(sex_id) | sex_id == "" ~ NA_character_,
sex_id %in% c("1", "Heterosexuell") ~ "Heterosexual",
sex_id %in% c("Unsicher", "Keine Angabe") ~ NA_character_,
TRUE ~ "LGBTQ+"
), levels = c("Heterosexual", "LGBTQ+"))
)
# Recode education for quota check
conjoint <- conjoint |> mutate(
resp_edu_level = case_when(
as.character(education_level) %in% cfg$education_map[["Low"]] ~ "Low",
as.character(education_level) %in% cfg$education_map[["Medium"]] ~ "Medium",
as.character(education_level) %in% cfg$education_map[["High"]] ~ "High",
# Numeric fallback
education_level == "1" ~ "Low",
education_level == "2" ~ "Medium",
education_level %in% c("3", "4", "5") ~ "High",
TRUE ~ NA_character_
)
)
# Factor conversions
conjoint <- conjoint |> mutate(
PARTY = factor(PARTY, levels = names(cfg$party_detect)),
FLAG = factor(FLAG, levels = c("No flag", "National flag", "National + EU flag", "EU flag")),
SOLIDARITY = factor(SOLIDARITY, levels = c("None", "Anti-Trump", "Pro-Trump", "Pro-Putin", "Pro-Ukraine")),
REJECTION = factor(REJECTION),
RELATIONSHIP_SEEK = factor(RELATIONSHIP_SEEK),
country = cfg$label
)
# Respondent-level data
respondents <- conjoint |> distinct(response_id, .keep_all = TRUE)
list(
raw = raw,
conjoint = conjoint,
respondents = respondents,
n_raw = n_raw,
n_finished = n_finished,
n_consented = n_consented,
cfg = cfg
)
}
# Process both countries
data <- list()
for (ck in names(countries)) {
result <- tryCatch(
process_country(countries[[ck]], ck),
error = function(e) {
cat(paste0("**Error processing ", countries[[ck]]$label, ":** ", e$message, "\n\n"))
NULL
}
)
if (!is.null(result)) data[[ck]] <- result
}
# Summary
for (ck in names(data)) {
d <- data[[ck]]
cat(paste0("**", d$cfg$label, ":** ",
nrow(d$respondents), " respondents, ",
nrow(d$conjoint), " profile evaluations\n\n"))
}