## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = FALSE ) ## ----prerequisites------------------------------------------------------------ # for(package in c("DatabaseConnector", "Eunomia")) { # if (!requireNamespace(package, quietly = TRUE)) { # install.packages(package) # } # } # # library(OdysseusCharacterizationModule) # library(DatabaseConnector) # library(Eunomia) ## ----connect------------------------------------------------------------------ # # connectionDetails <- getEunomiaConnectionDetails() # Eunomia::createCohorts(connectionDetails) # connection <- connect(connectionDetails) ## ----verify-cohorts----------------------------------------------------------- # cohortCounts <- querySql(connection, " # SELECT cohort_definition_id, COUNT(*) AS cnt # FROM main.cohort # GROUP BY cohort_definition_id # ORDER BY cohort_definition_id # ") # cohortCounts ## ----common-params------------------------------------------------------------ # COHORT_ID <- 1L # CDM_SCHEMA <- "main" # COHORT_TBL <- "cohort" # TEMP_SCHEMA <- "main" # SQLite temp-table emulation ## ----windows------------------------------------------------------------------ # windows <- defineAnalysisWindows( # startDays = c(-365, 1), # endDays = c( -1, 365) # ) # windows ## ----condition-start---------------------------------------------------------- # plan_cond <- planAnalysis( # analysisWindows = windows, # useBaseFeatures = list( # condition_occurrence = list(include = TRUE, type = "start"), # condition_era = list(include = FALSE), # drug_exposure = list(include = FALSE), # drug_era = list(include = FALSE), # procedure_occurrence = list(include = FALSE), # observation = list(include = FALSE), # device_exposure = list(include = FALSE), # visit_occurrence = list(include = FALSE), # measurement = list(include = FALSE) # ), # useCohortFeatures = list(include = FALSE), # useConceptSetFeatures = list(include = FALSE) # ) # # specs_cond <- singleNodeSetting( # plan = plan_cond, # cohortId = COHORT_ID, # cohortDatabaseSchema = CDM_SCHEMA, # cohortTable = COHORT_TBL, # cdmDatabaseSchema = CDM_SCHEMA, # vocabularyDatabaseSchema = CDM_SCHEMA, # aggregated = TRUE # ) # # cat("Specs generated:", length(specs_cond), "\n") # # results_cond <- executeSpecs( # connection, specs_cond, # tempEmulationSchema = TEMP_SCHEMA, # cleanTempTables = TRUE # ) # # head(results_cond[["1001"]], 10) ## ----drug-exposure------------------------------------------------------------ # plan_drug <- planAnalysis( # analysisWindows = windows, # useBaseFeatures = list( # condition_occurrence = list(include = FALSE), # condition_era = list(include = FALSE), # drug_exposure = list(include = TRUE), # drug_era = list(include = FALSE), # procedure_occurrence = list(include = FALSE), # observation = list(include = FALSE), # device_exposure = list(include = FALSE), # visit_occurrence = list(include = FALSE), # measurement = list(include = FALSE) # ), # useCohortFeatures = list(include = FALSE), # useConceptSetFeatures = list(include = FALSE) # ) # # specs_drug <- singleNodeSetting( # plan = plan_drug, # cohortId = COHORT_ID, # cohortDatabaseSchema = CDM_SCHEMA, # cohortTable = COHORT_TBL, # cdmDatabaseSchema = CDM_SCHEMA, # vocabularyDatabaseSchema = CDM_SCHEMA, # aggregated = TRUE # ) # # results_drug <- executeSpecs( # connection, specs_drug, # tempEmulationSchema = TEMP_SCHEMA, # cleanTempTables = TRUE # ) # # head(results_drug[["1001"]], 10) ## ----condition-era-overlap---------------------------------------------------- # plan_era <- planAnalysis( # analysisWindows = windows, # useBaseFeatures = list( # condition_occurrence = list(include = FALSE), # condition_era = list(include = TRUE, type = "overlap"), # drug_exposure = list(include = FALSE), # drug_era = list(include = FALSE), # procedure_occurrence = list(include = FALSE), # observation = list(include = FALSE), # device_exposure = list(include = FALSE), # visit_occurrence = list(include = FALSE), # measurement = list(include = FALSE) # ), # useCohortFeatures = list(include = FALSE), # useConceptSetFeatures = list(include = FALSE) # ) # # specs_era <- singleNodeSetting( # plan = plan_era, # cohortId = COHORT_ID, # cohortDatabaseSchema = CDM_SCHEMA, # cohortTable = COHORT_TBL, # cdmDatabaseSchema = CDM_SCHEMA, # vocabularyDatabaseSchema = CDM_SCHEMA, # aggregated = TRUE # ) # # results_era <- executeSpecs( # connection, specs_era, # tempEmulationSchema = TEMP_SCHEMA, # cleanTempTables = TRUE # ) # # head(results_era[["1001"]], 10) ## ----drug-era-overlap--------------------------------------------------------- # plan_dera <- planAnalysis( # analysisWindows = windows, # useBaseFeatures = list( # condition_occurrence = list(include = FALSE), # condition_era = list(include = FALSE), # drug_exposure = list(include = FALSE), # drug_era = list(include = TRUE, type = "overlap"), # procedure_occurrence = list(include = FALSE), # observation = list(include = FALSE), # device_exposure = list(include = FALSE), # visit_occurrence = list(include = FALSE), # measurement = list(include = FALSE) # ), # useCohortFeatures = list(include = FALSE), # useConceptSetFeatures = list(include = FALSE) # ) # # specs_dera <- singleNodeSetting( # plan = plan_dera, # cohortId = COHORT_ID, # cohortDatabaseSchema = CDM_SCHEMA, # cohortTable = COHORT_TBL, # cdmDatabaseSchema = CDM_SCHEMA, # vocabularyDatabaseSchema = CDM_SCHEMA, # aggregated = TRUE # ) # # results_dera <- executeSpecs( # connection, specs_dera, # tempEmulationSchema = TEMP_SCHEMA, # cleanTempTables = TRUE # ) # # head(results_dera[["1001"]], 10) ## ----procedure---------------------------------------------------------------- # plan_proc <- planAnalysis( # analysisWindows = windows, # useBaseFeatures = list( # condition_occurrence = list(include = FALSE), # condition_era = list(include = FALSE), # drug_exposure = list(include = FALSE), # drug_era = list(include = FALSE), # procedure_occurrence = list(include = TRUE, type = "start"), # observation = list(include = FALSE), # device_exposure = list(include = FALSE), # visit_occurrence = list(include = FALSE), # measurement = list(include = FALSE) # ), # useCohortFeatures = list(include = FALSE), # useConceptSetFeatures = list(include = FALSE) # ) # # specs_proc <- singleNodeSetting( # plan = plan_proc, # cohortId = COHORT_ID, # cohortDatabaseSchema = CDM_SCHEMA, # cohortTable = COHORT_TBL, # cdmDatabaseSchema = CDM_SCHEMA, # vocabularyDatabaseSchema = CDM_SCHEMA, # aggregated = TRUE # ) # # results_proc <- executeSpecs( # connection, specs_proc, # tempEmulationSchema = TEMP_SCHEMA, # cleanTempTables = TRUE # ) # # head(results_proc[["1001"]], 10) ## ----measurement-------------------------------------------------------------- # plan_meas <- planAnalysis( # analysisWindows = windows, # useBaseFeatures = list( # condition_occurrence = list(include = FALSE), # condition_era = list(include = FALSE), # drug_exposure = list(include = FALSE), # drug_era = list(include = FALSE), # procedure_occurrence = list(include = FALSE), # observation = list(include = FALSE), # device_exposure = list(include = FALSE), # visit_occurrence = list(include = FALSE), # measurement = list(include = TRUE) # ), # useCohortFeatures = list(include = FALSE), # useConceptSetFeatures = list(include = FALSE) # ) # # specs_meas <- singleNodeSetting( # plan = plan_meas, # cohortId = COHORT_ID, # cohortDatabaseSchema = CDM_SCHEMA, # cohortTable = COHORT_TBL, # cdmDatabaseSchema = CDM_SCHEMA, # vocabularyDatabaseSchema = CDM_SCHEMA, # aggregated = TRUE # ) # # results_meas <- executeSpecs( # connection, specs_meas, # tempEmulationSchema = TEMP_SCHEMA, # cleanTempTables = TRUE # ) # # head(results_meas[["1001"]], 10) ## ----observation-------------------------------------------------------------- # plan_obs <- planAnalysis( # analysisWindows = windows, # useBaseFeatures = list( # condition_occurrence = list(include = FALSE), # condition_era = list(include = FALSE), # drug_exposure = list(include = FALSE), # drug_era = list(include = FALSE), # procedure_occurrence = list(include = FALSE), # observation = list(include = TRUE), # device_exposure = list(include = FALSE), # visit_occurrence = list(include = FALSE), # measurement = list(include = FALSE) # ), # useCohortFeatures = list(include = FALSE), # useConceptSetFeatures = list(include = FALSE) # ) # # specs_obs <- singleNodeSetting( # plan = plan_obs, # cohortId = COHORT_ID, # cohortDatabaseSchema = CDM_SCHEMA, # cohortTable = COHORT_TBL, # cdmDatabaseSchema = CDM_SCHEMA, # vocabularyDatabaseSchema = CDM_SCHEMA, # aggregated = TRUE # ) # # results_obs <- executeSpecs( # connection, specs_obs, # tempEmulationSchema = TEMP_SCHEMA, # cleanTempTables = TRUE # ) # # head(results_obs[["1001"]]) ## ----visit-overlap------------------------------------------------------------ # plan_visit <- planAnalysis( # analysisWindows = windows, # useBaseFeatures = list( # condition_occurrence = list(include = FALSE), # condition_era = list(include = FALSE), # drug_exposure = list(include = FALSE), # drug_era = list(include = FALSE), # procedure_occurrence = list(include = FALSE), # observation = list(include = FALSE), # device_exposure = list(include = FALSE), # visit_occurrence = list(include = TRUE, type = "overlap"), # measurement = list(include = FALSE) # ), # useCohortFeatures = list(include = FALSE), # useConceptSetFeatures = list(include = FALSE) # ) # # specs_visit <- singleNodeSetting( # plan = plan_visit, # cohortId = COHORT_ID, # cohortDatabaseSchema = CDM_SCHEMA, # cohortTable = COHORT_TBL, # cdmDatabaseSchema = CDM_SCHEMA, # vocabularyDatabaseSchema = CDM_SCHEMA, # aggregated = TRUE # ) # # results_visit <- executeSpecs( # connection, specs_visit, # tempEmulationSchema = TEMP_SCHEMA, # cleanTempTables = TRUE # ) # # head(results_visit[["1001"]]) ## ----non-aggregated----------------------------------------------------------- # specs_raw <- singleNodeSetting( # plan = plan_cond, # cohortId = COHORT_ID, # cohortDatabaseSchema = CDM_SCHEMA, # cohortTable = COHORT_TBL, # cdmDatabaseSchema = CDM_SCHEMA, # vocabularyDatabaseSchema = CDM_SCHEMA, # aggregated = FALSE # ) # # results_raw <- executeSpecs( # connection, specs_raw, # tempEmulationSchema = TEMP_SCHEMA, # cleanTempTables = TRUE # ) # # cat("Patient-level rows (window 1):", nrow(results_raw[["1001"]]), "\n") # head(results_raw[["1001"]], 10) ## ----multi-domain------------------------------------------------------------- # plan_multi <- planAnalysis( # analysisWindows = windows, # useBaseFeatures = list( # condition_occurrence = list(include = TRUE, type = "start"), # condition_era = list(include = TRUE, type = "overlap"), # drug_exposure = list(include = TRUE), # drug_era = list(include = TRUE, type = "overlap"), # procedure_occurrence = list(include = TRUE), # observation = list(include = FALSE), # device_exposure = list(include = FALSE), # visit_occurrence = list(include = TRUE, type = "overlap"), # measurement = list(include = TRUE) # ), # useCohortFeatures = list(include = FALSE), # useConceptSetFeatures = list(include = FALSE) # ) # # specs_multi <- singleNodeSetting( # plan = plan_multi, # cohortId = COHORT_ID, # cohortDatabaseSchema = CDM_SCHEMA, # cohortTable = COHORT_TBL, # cdmDatabaseSchema = CDM_SCHEMA, # vocabularyDatabaseSchema = CDM_SCHEMA, # aggregated = TRUE # ) # # cat("Total specs:", length(specs_multi), "\n") # # results_multi <- executeSpecs( # connection, specs_multi, # tempEmulationSchema = TEMP_SCHEMA, # cleanTempTables = TRUE # ) # # # Summary across all specs # summary_df <- do.call(rbind, lapply(names(results_multi), function(nm) { # df <- results_multi[[nm]] # data.frame(analysis_id = nm, rows = nrow(df), stringsAsFactors = FALSE) # })) # summary_df ## ----cohort-features---------------------------------------------------------- # plan_cohort <- planAnalysis( # analysisWindows = windows, # useBaseFeatures = list( # condition_occurrence = list(include = FALSE), # condition_era = list(include = FALSE), # drug_exposure = list(include = FALSE), # drug_era = list(include = FALSE), # procedure_occurrence = list(include = FALSE), # observation = list(include = FALSE), # device_exposure = list(include = FALSE), # visit_occurrence = list(include = FALSE), # measurement = list(include = FALSE) # ), # useCohortFeatures = list( # include = TRUE, # type = "start", # cohortIds = c(3L, 4L), # cohortNames = c("GiBleed", "NSAIDs"), # cohortTable = "cohort", # covariateSchema = "main" # ), # useConceptSetFeatures = list(include = FALSE) # ) # # specs_cohort <- singleNodeSetting( # plan = plan_cohort, # cohortId = COHORT_ID, # cohortDatabaseSchema = CDM_SCHEMA, # cohortTable = COHORT_TBL, # cdmDatabaseSchema = CDM_SCHEMA, # vocabularyDatabaseSchema = CDM_SCHEMA, # aggregated = TRUE # ) # # cat("Cohort feature specs:", length(specs_cohort), "\n") # # results_cohort <- executeSpecs( # connection, specs_cohort, # tempEmulationSchema = TEMP_SCHEMA, # cleanTempTables = TRUE # ) # # # Show results for every cohort feature spec # for (nm in names(results_cohort)) { # cat("\n--- Analysis", nm, "---\n") # print(results_cohort[[nm]]) # } ## ----cohort-overlap----------------------------------------------------------- # plan_coh_ov <- planAnalysis( # analysisWindows = windows, # useBaseFeatures = list( # condition_occurrence = list(include = FALSE), # condition_era = list(include = FALSE), # drug_exposure = list(include = FALSE), # drug_era = list(include = FALSE), # procedure_occurrence = list(include = FALSE), # observation = list(include = FALSE), # device_exposure = list(include = FALSE), # visit_occurrence = list(include = FALSE), # measurement = list(include = FALSE) # ), # useCohortFeatures = list( # include = TRUE, # type = "overlap", # cohortIds = c(3L), # cohortNames = c("GiBleed"), # cohortTable = "cohort", # covariateSchema = "main" # ), # useConceptSetFeatures = list(include = FALSE) # ) # # specs_coh_ov <- singleNodeSetting( # plan = plan_coh_ov, # cohortId = COHORT_ID, # cohortDatabaseSchema = CDM_SCHEMA, # cohortTable = COHORT_TBL, # cdmDatabaseSchema = CDM_SCHEMA, # vocabularyDatabaseSchema = CDM_SCHEMA, # aggregated = TRUE # ) # # cat("Overlap flag:", specs_coh_ov[[1]]$overlap, "\n") # # results_coh_ov <- executeSpecs( # connection, specs_coh_ov, # tempEmulationSchema = TEMP_SCHEMA, # cleanTempTables = TRUE # ) # # for (nm in names(results_coh_ov)) { # cat("\n--- Analysis", nm, "---\n") # print(results_coh_ov[[nm]]) # } ## ----render-only-------------------------------------------------------------- # sql_default <- renderSpecSql(specs_cond[[1]]) # cat("--- SQL Server (default) ---\n") # cat(substr(sql_default, 1, 500), "\n...\n") ## ----render-dialects---------------------------------------------------------- # for (dialect in c("postgresql", "redshift", "oracle", "spark")) { # cat("\n--- Dialect:", dialect, "---\n") # sql_translated <- renderSpecSql(specs_cond[[1]], targetDialect = dialect) # cat(substr(sql_translated, 1, 400), "\n...\n") # } ## ----render-all--------------------------------------------------------------- # all_sql <- renderAllSpecSql(specs_cond) # cat("Number of rendered SQL statements:", length(all_sql), "\n") # cat("Analysis IDs:", paste(names(all_sql), collapse = ", "), "\n") ## ----many-windows------------------------------------------------------------- # windows_8 <- defineAnalysisWindows( # startDays = c(-365, -180, -90, -30, 1, 31, 91, 181), # endDays = c( -1, -1, -1, -1, 30, 90, 180, 365) # ) # # plan_8w <- planAnalysis( # analysisWindows = windows_8, # useBaseFeatures = list( # condition_occurrence = list(include = TRUE, type = "start"), # condition_era = list(include = FALSE), # drug_exposure = list(include = FALSE), # drug_era = list(include = FALSE), # procedure_occurrence = list(include = FALSE), # observation = list(include = FALSE), # device_exposure = list(include = FALSE), # visit_occurrence = list(include = FALSE), # measurement = list(include = FALSE) # ), # useCohortFeatures = list(include = FALSE), # useConceptSetFeatures = list(include = FALSE) # ) # # specs_8w <- singleNodeSetting( # plan = plan_8w, # cohortId = COHORT_ID, # cohortDatabaseSchema = CDM_SCHEMA, # cohortTable = COHORT_TBL, # cdmDatabaseSchema = CDM_SCHEMA, # vocabularyDatabaseSchema = CDM_SCHEMA, # aggregated = TRUE # ) # # cat("Specs with 8 windows:", length(specs_8w), "\n") # # results_8w <- executeSpecs( # connection, specs_8w, # tempEmulationSchema = TEMP_SCHEMA, # cleanTempTables = TRUE # ) # # data.frame( # analysis_id = names(results_8w), # rows = vapply(results_8w, nrow, integer(1)) # ) ## ----combined----------------------------------------------------------------- # plan_combined <- planAnalysis( # analysisWindows = windows, # useBaseFeatures = list( # condition_occurrence = list(include = TRUE, type = "start"), # condition_era = list(include = FALSE), # drug_exposure = list(include = TRUE), # drug_era = list(include = FALSE), # procedure_occurrence = list(include = TRUE), # observation = list(include = FALSE), # device_exposure = list(include = FALSE), # visit_occurrence = list(include = FALSE), # measurement = list(include = TRUE) # ), # useCohortFeatures = list( # include = TRUE, # type = "start", # cohortIds = c(3L), # cohortNames = c("GiBleed"), # cohortTable = "cohort", # covariateSchema = "main" # ), # useConceptSetFeatures = list(include = FALSE) # ) # # specs_combined <- singleNodeSetting( # plan = plan_combined, # cohortId = COHORT_ID, # cohortDatabaseSchema = CDM_SCHEMA, # cohortTable = COHORT_TBL, # cdmDatabaseSchema = CDM_SCHEMA, # vocabularyDatabaseSchema = CDM_SCHEMA, # aggregated = TRUE # ) # # cat("Total specs (4 base domains x 2 windows + 1 cohort x 2 windows):", # length(specs_combined), "\n") # # results_combined <- executeSpecs( # connection, specs_combined, # tempEmulationSchema = TEMP_SCHEMA, # cleanTempTables = TRUE # ) # # summary_combined <- do.call(rbind, lapply(names(results_combined), function(nm) { # df <- results_combined[[nm]] # data.frame( # analysis_id = nm, # source = if (nrow(df) > 0) "data" else "empty", # rows = nrow(df), # stringsAsFactors = FALSE # ) # })) # summary_combined ## ----diclofenac--------------------------------------------------------------- # specs_diclo <- singleNodeSetting( # plan = plan_cond, # cohortId = 2L, # cohortDatabaseSchema = CDM_SCHEMA, # cohortTable = COHORT_TBL, # cdmDatabaseSchema = CDM_SCHEMA, # vocabularyDatabaseSchema = CDM_SCHEMA, # aggregated = TRUE # ) # # results_diclo <- executeSpecs( # connection, specs_diclo, # tempEmulationSchema = TEMP_SCHEMA, # cleanTempTables = TRUE # ) # # cat("Diclofenac condition covariates (pre-index):\n") # head(results_diclo[["1001"]], 10) ## ----error-handling----------------------------------------------------------- # results_safe <- executeSpecs( # connection, specs_multi, # tempEmulationSchema = TEMP_SCHEMA, # cleanTempTables = TRUE, # stopOnError = FALSE # ) # # failed <- vapply(results_safe, function(df) !is.null(attr(df, "error")), logical(1)) # cat("Failed specs:", sum(failed), "/", length(results_safe), "\n") ## ----cleanup------------------------------------------------------------------ # disconnect(connection) ## ----session-info------------------------------------------------------------- # sessionInfo()