## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----echo=TRUE---------------------------------------------------------------- set.seed(1976) library(survey) # Creating dataset with price acceptance and biased gender variable input_data <- data.frame(tch = round(rnorm(n = 250, mean = 8, sd = 0.5), digits = 2), ch = round(rnorm(n = 250, mean = 12, sd = 0.5), digits = 2), ex = round(rnorm(n = 250, mean = 13, sd = 0.5), digits = 2), tex = round(rnorm(n = 250, mean = 15, sd = 0.5), digits = 2), gender = sample(x = c("male", "female"), size = 250, replace = TRUE, prob = c(2/3, 1/3))) # for women: increasing the price acceptance by +50% input_data$tch[input_data$gender == "female"] <- input_data$tch[input_data$gender == "female"] * 1.5 input_data$ch[input_data$gender == "female"] <- input_data$ch[input_data$gender == "female"] * 1.5 input_data$ex[input_data$gender == "female"] <- input_data$ex[input_data$gender == "female"] * 1.5 input_data$tex[input_data$gender == "female"] <- input_data$tex[input_data$gender == "female"] * 1.5 # for survey design object: occurrence of each gender in the target population # it's only one figure in this example because we assume that gender should be perfectly balanced. # if this is not balanced in the population, we would need a vector with the number of occurrences in the population. # the sum of the strata size across all strata gives the total population size # (here: two strata with 5k each = 10k total population) input_data$gender_pop <- 5000 # creating the survey design object for post-stratification based on gender # we assume that the selection of respondents within each gender is biased and... # only the gender balance in the sample is problematic input_design <- survey::svydesign(ids = ~ 1, # no clusters probs = NULL, # hence no cluster sampling probabilities, strata = input_data$gender, # stratified by gender fpc = input_data$gender_pop, # strata size in the population data = input_data) # data object used as input ## ----echo=TRUE---------------------------------------------------------------- library(pricesensitivitymeter) ## ----label='c1', echo=TRUE, cache=TRUE---------------------------------------- output_weighted_psm <- psm_analysis_weighted(toocheap = "tch", cheap = "ch", expensive = "ex", tooexpensive = "tex", design = input_design) summary(output_weighted_psm) ## ----echo=TRUE---------------------------------------------------------------- set.seed(20) library(survey) ## ----label='c5', echo=TRUE, cache=TRUE---------------------------------------- # Creating dataset with price acceptance and unbiased gender variable input_data_2 <- data.frame(tch = round(rnorm(n = 250, mean = 4, sd = 0.5), digits = 2), ch = round(rnorm(n = 250, mean = 8, sd = 0.5), digits = 2), ex = round(rnorm(n = 250, mean = 12, sd = 0.5), digits = 2), tex = round(rnorm(n = 250, mean = 16, sd = 0.5), digits = 2), gender = sample(x = c("male", "female"), size = 250, replace = TRUE, prob = c(0.5, 0.5))) # for women: increasing the price acceptance by +50% input_data_2$tch[input_data_2$gender == "female"] <- input_data_2$tch[input_data_2$gender == "female"] * 1.5 input_data_2$ch[input_data_2$gender == "female"] <- input_data_2$ch[input_data_2$gender == "female"] * 1.5 input_data_2$ex[input_data_2$gender == "female"] <- input_data_2$ex[input_data_2$gender == "female"] * 1.5 input_data_2$tex[input_data_2$gender == "female"] <- input_data_2$tex[input_data_2$gender == "female"] * 1.5 # now let's create a sample design object (using the survey package) # ... assuming that gender is balanced equally in the population of 10000 # for survey design object: occurrence of each gender in the target population # would usually be information from sampling frame, differs here only for demonstration purposes # here: scaling up based on actual sample information (hypothetical population of 250 * 4 = 10k) input_data_2$gender_pop <- NA input_data_2$gender_pop[input_data_2$gender == "female"] <- sum(input_data_2$gender == "female") * 40 input_data_2$gender_pop[input_data_2$gender == "male"] <- sum(input_data_2$gender == "male") * 40 # creating the survey design object for post-stratification based on gender input_design_2 <- survey::svydesign(ids = ~ 1, # no clusters probs = NULL, # hence no cluster sampling probabilities, strata = input_data_2$gender, # stratified by gender fpc = input_data_2$gender_pop, # strata size in the population data = input_data_2) # data object used as input # quick check: there is only one weight for all our strata # if we would have different weights per gender, we would see two unique values here unique(weights(input_design_2, type = "analysis")) ## ----label='c2', echo=TRUE, cache=TRUE---------------------------------------- # running both weighted and unweighted analysis on the same data check_weighted_1 <- psm_analysis_weighted(toocheap = "tch", cheap = "ch", expensive = "ex", tooexpensive = "tex", design = input_design_2) check_unweighted_1 <- psm_analysis(toocheap = "tch", cheap = "ch", expensive = "ex", tooexpensive = "tex", data = input_data_2) # results should be identical summary(check_weighted_1) summary(check_unweighted_1) ## ----------------------------------------------------------------------------- input_data_3 <- input_data_2 manipulated_men <- sample(which(input_data_2$gender == "male"), 10) input_data_3$ch[manipulated_men] <- input_data_3$tex[manipulated_men] ## ----label='c3', cache=TRUE--------------------------------------------------- # creating the survey design object for post-stratification based on gender input_design_3 <- survey::svydesign(ids = ~ 1, # no clusters probs = NULL, # hence no cluster sampling probabilities, strata = input_data_3$gender, # stratified by gender fpc = input_data_3$gender_pop, # strata size in the population data = input_data_3) # data object used as input check_weighted_2 <- psm_analysis_weighted(toocheap = "tch", cheap = "ch", expensive = "ex", tooexpensive = "tex", design = input_design_3) check_unweighted_2 <- psm_analysis(toocheap = "tch", cheap = "ch", expensive = "ex", tooexpensive = "tex", data = input_data_3) # results should be different now summary(check_weighted_2) summary(check_unweighted_2) ## ----label='c4', cache=TRUE--------------------------------------------------- # setting up data with NAs in "too cheap" variable input_data_2 <- input_data input_data_2$tch <- NA # create new sample design input_design_2 <- survey::svydesign(ids = ~ 1, # no clusters probs = NULL, # hence no cluster samling probabilities, strata = input_data_2$gender, # stratified by gender fpc = input_data_2$gender_pop, # strata size in the population data = input_data_2) # data object used as input test_2 <- psm_analysis_weighted(toocheap = "tch", cheap = "ch", expensive = "ex", tooexpensive = "tex", design = input_design_2) summary(test_2) ## ----------------------------------------------------------------------------- # setting up dataset with purchase intent information input_data_3 <- input_data input_data_3$pi_ch <- sample(x = c(1:5), size = nrow(input_data_3), replace = TRUE, prob = c(0.1, 0.1, 0.2, 0.3, 0.3)) input_data_3$pi_ex <- sample(x = c(1:5), size = nrow(input_data_3), replace = TRUE, prob = c(0.3, 0.3, 0.2, 0.1, 0.1)) # re-creating the survey design object input_design_3 <- survey::svydesign(ids = ~ 1, probs = NULL, strata = input_data_3$gender, fpc = input_data_3$gender_pop, data = input_data_3) # running the weighted Price Sensitivity Meter analysis test_3 <- psm_analysis_weighted(toocheap = "tch", cheap = "ch", expensive = "ex", tooexpensive = "tex", design = input_design_3, pi_cheap = "pi_ch", pi_expensive = "pi_ex", pi_scale = 5:1, pi_calibrated = c(0.7, 0.5, 0.3, 0.1, 0)) summary(test_3)