## ----setup-------------------------------------------------------------------- library(clinCompare) ## ----compare-datasets--------------------------------------------------------- baseline <- data.frame( USUBJID = c("SUBJ01", "SUBJ02", "SUBJ03"), AGE = c(45, 52, 38), SEX = c("M", "F", "M"), RACE = c("WHITE", "WHITE", "ASIAN"), stringsAsFactors = FALSE ) updated <- data.frame( USUBJID = c("SUBJ01", "SUBJ02", "SUBJ03"), AGE = c(45, 53, 38), SEX = c("M", "F", "F"), RACE = c("WHITE", "WHITE", "ASIAN"), stringsAsFactors = FALSE ) result <- compare_datasets(baseline, updated) result ## ----drill-into-result-------------------------------------------------------- # Per-column difference counts result$observation_comparison$discrepancies # Row-level details for a specific variable result$observation_comparison$details$SEX ## ----compare-variables-------------------------------------------------------- df_a <- data.frame( USUBJID = c("SUBJ01", "SUBJ02"), AGE = c(45, 52), SEX = c("M", "F"), stringsAsFactors = FALSE ) df_b <- data.frame( USUBJID = c("SUBJ01", "SUBJ02"), AGE = c(45L, 52L), WEIGHT = c(75.5, 80.2), stringsAsFactors = FALSE ) compare_variables(df_a, df_b) ## ----compare-observations----------------------------------------------------- df1 <- data.frame( ID = c(1, 2, 3), SCORE = c(80, 90, 70), stringsAsFactors = FALSE ) df2 <- data.frame( ID = c(1, 2, 3), SCORE = c(80, 95, 70), stringsAsFactors = FALSE ) compare_observations(df1, df2) ## ----clean-dataset------------------------------------------------------------ messy <- data.frame( NAME = c("Alice", "alice", "Bob", "Bob"), SCORE = c(100, 100, 85, 85), stringsAsFactors = FALSE ) clean_dataset(messy, remove_duplicates = TRUE, convert_to_case = "upper") ## ----prepare-datasets--------------------------------------------------------- df_unsorted1 <- data.frame( REGION = c("West", "East", "North"), SALES = c(150, 200, 180) ) df_unsorted2 <- data.frame( REGION = c("East", "North", "West"), SALES = c(210, 185, 160) ) prepped <- prepare_datasets(df_unsorted1, df_unsorted2, sort_columns = "REGION") prepped$df1 prepped$df2 ## ----compare-by-group--------------------------------------------------------- site_data_v1 <- data.frame( SITEID = c("SITE01", "SITE01", "SITE02", "SITE02"), SUBJID = c("S01", "S02", "S03", "S04"), AGE = c(45, 52, 38, 61) ) site_data_v2 <- data.frame( SITEID = c("SITE01", "SITE01", "SITE02", "SITE02"), SUBJID = c("S01", "S02", "S03", "S04"), AGE = c(45, 53, 38, 62) ) by_site <- compare_by_group(site_data_v1, site_data_v2, group_vars = "SITEID") names(by_site) ## ----detect-domain------------------------------------------------------------ dm_data <- data.frame( STUDYID = rep("STUDY01", 3), USUBJID = c("SUBJ01", "SUBJ02", "SUBJ03"), AGE = c(45, 62, 51), SEX = c("M", "F", "M"), RACE = c("WHITE", "BLACK", "ASIAN"), ARMCD = c("TRT", "PBO", "TRT"), ARM = c("Treatment", "Placebo", "Treatment"), stringsAsFactors = FALSE ) detect_cdisc_domain(dm_data) ## ----cdisc-compare------------------------------------------------------------ dm_v1 <- data.frame( STUDYID = rep("STUDY01", 3), USUBJID = c("SUBJ01", "SUBJ02", "SUBJ03"), AGE = c(45, 62, 51), SEX = c("M", "F", "M"), RACE = c("WHITE", "BLACK", "ASIAN"), ARMCD = c("TRT", "PBO", "TRT"), ARM = c("Treatment", "Placebo", "Treatment"), RFSTDTC = c("2024-01-15", "2024-01-16", "2024-01-17"), stringsAsFactors = FALSE ) dm_v2 <- data.frame( STUDYID = rep("STUDY01", 3), USUBJID = c("SUBJ01", "SUBJ02", "SUBJ03"), AGE = c(45, 62, 52), SEX = c("M", "F", "M"), RACE = c("WHITE", "BLACK", "ASIAN"), ARMCD = c("TRT", "PBO", "TRT"), ARM = c("Treatment", "Placebo", "Treatment"), RFSTDTC = c("2024-01-15", "2024-01-16", "2024-01-17"), stringsAsFactors = FALSE ) cdisc_result <- cdisc_compare(dm_v1, dm_v2, domain = "DM", standard = "SDTM") cdisc_result ## ----validate-cdisc----------------------------------------------------------- validation <- validate_cdisc(dm_v1, domain = "DM", standard = "SDTM") ## ----get-all-diffs------------------------------------------------------------ diffs <- get_all_differences(cdisc_result) diffs ## ----export-report------------------------------------------------------------ # HTML report export_report(cdisc_result, file.path(tempdir(), "dm_report.html")) # Text report export_report(cdisc_result, file.path(tempdir(), "dm_report.txt")) ## ----export-excel, eval=FALSE------------------------------------------------- # # Excel workbook with Summary, Variable Diffs, Value Diffs, and CDISC tabs # export_report(cdisc_result, file.path(tempdir(), "dm_report.xlsx")) ## ----batch-compare, eval=FALSE------------------------------------------------ # results <- compare_submission( # base_dir = "submission_v1/", # compare_dir = "submission_v2/", # output_file = "submission_diff.xlsx" # )