## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", echo = FALSE ) isMissingOrEmpty <- function(x) { length(x) == 0 || is.na(x[1]) || !nzchar(x[1]) } readParquetIfExists <- function(path) { if (!file.exists(path)) { return(NULL) } as.data.frame(nanoparquet::read_parquet(path), stringsAsFactors = FALSE) } deserializeTimeColumn <- function(df) { if (!is.data.frame(df) || !("TIME_TO_EVENT" %in% colnames(df))) { return(df) } if (!is.character(df$TIME_TO_EVENT)) { return(df) } df$TIME_TO_EVENT <- lapply(df$TIME_TO_EVENT, function(x) { if (is.null(x) || (length(x) == 1 && is.na(x)) || !nzchar(x)) { return(numeric(0)) } parsed <- tryCatch(jsonlite::fromJSON(x), error = function(e) NULL) if (is.null(parsed)) x else parsed }) df } loadStudyFallback <- function(root, studyName) { studyPathLocal <- file.path(root, studyName) dataPatients <- deserializeTimeColumn(readParquetIfExists(file.path(studyPathLocal, "data_patients.parquet"))) dataFeatures <- deserializeTimeColumn(readParquetIfExists(file.path(studyPathLocal, "data_features.parquet"))) dataInitial <- readParquetIfExists(file.path(studyPathLocal, "data_initial.parquet")) dataPerson <- readParquetIfExists(file.path(studyPathLocal, "data_person.parquet")) mapping <- readParquetIfExists(file.path(studyPathLocal, "complementaryMappingTable.parquet")) if (!is.data.frame(mapping)) { mapping <- data.frame() } metadataPath <- file.path(studyPathLocal, "metadata.json") metadata <- if (file.exists(metadataPath)) jsonlite::fromJSON(metadataPath, simplifyVector = TRUE) else NULL selectedFeatures <- readParquetIfExists(file.path(studyPathLocal, "selected_features.parquet")) if (!is.data.frame(selectedFeatures)) { selectedFeatures <- dataFeatures } selectedFeatureData <- list( selectedFeatureNames = if (is.data.frame(selectedFeatures) && "CONCEPT_NAME" %in% colnames(selectedFeatures)) unique(selectedFeatures$CONCEPT_NAME) else character(0), selectedFeatureIds = if (is.data.frame(selectedFeatures) && "CONCEPT_ID" %in% colnames(selectedFeatures)) selectedFeatures$CONCEPT_ID else numeric(0), selectedFeatures = if (is.data.frame(selectedFeatures)) selectedFeatures else data.frame() ) conceptAncestor <- readParquetIfExists(file.path(studyPathLocal, "concepts_concept_ancestor.parquet")) concept <- readParquetIfExists(file.path(studyPathLocal, "concepts_concept.parquet")) obj <- list( data_patients = if (is.data.frame(dataPatients)) dataPatients else data.frame(), data_initial = if (is.data.frame(dataInitial)) dataInitial else data.frame(), data_person = if (is.data.frame(dataPerson)) dataPerson else data.frame(), data_features = if (is.data.frame(dataFeatures)) dataFeatures else data.frame(), conceptsData = list(concept_ancestor = conceptAncestor, concept = concept), complementaryMappingTable = mapping, selectedFeatureData = selectedFeatureData, trajectoryDataList = selectedFeatureData, config = list(complName = studyName, metadata = metadata) ) class(obj) <- "CohortContrastObject" obj } exampleRoot <- system.file("example", "st", package = "CohortContrast") if (isMissingOrEmpty(exampleRoot) && dir.exists("inst/example/st")) { exampleRoot <- normalizePath("inst/example/st") } studyPath <- file.path(exampleRoot, "lc500") if (isMissingOrEmpty(exampleRoot) || !dir.exists(studyPath)) { cat("Bundled example study 'lc500' is not available in this build.\n") knitr::knit_exit() } data <- tryCatch( CohortContrast::loadCohortContrastStudy( studyName = "lc500", pathToResults = exampleRoot ), error = function(e) { msg <- conditionMessage(e) if (grepl("topKInt", msg, fixed = TRUE) || grepl("missing value where TRUE/FALSE needed", msg, fixed = TRUE)) { loadStudyFallback(exampleRoot, "lc500") } else { stop(e) } } ) ## ----------------------------------------------------------------------------- names(data) ## ----------------------------------------------------------------------------- utils::head(data$data_initial, 10) ## ----------------------------------------------------------------------------- utils::head(data$data_person, 10) ## ----------------------------------------------------------------------------- utils::head(data$data_features, 10) ## ----------------------------------------------------------------------------- utils::head(data$data_patients, 10) ## ----------------------------------------------------------------------------- utils::head(data$complementaryMappingTable, 10) ## ----------------------------------------------------------------------------- if (length(data$selectedFeatureData$selectedFeatureNames) > 0) { utils::head(data.frame(CONCEPT_NAME = data$selectedFeatureData$selectedFeatureNames, stringsAsFactors = FALSE), 10) } else { cat("No selected feature names available in this study.\n") } ## ----------------------------------------------------------------------------- if (length(data$selectedFeatureData$selectedFeatureIds) > 0) { utils::head(data.frame(CONCEPT_ID = data$selectedFeatureData$selectedFeatureIds, stringsAsFactors = FALSE), 10) } else { cat("No selected feature ids available in this study.\n") } ## ----------------------------------------------------------------------------- utils::head(data$selectedFeatureData$selectedFeatures, 10) ## ----------------------------------------------------------------------------- if (is.data.frame(data$conceptsData$concept_ancestor)) { utils::head(data$conceptsData$concept_ancestor, 10) } else { cat("No `concept_ancestor` table available in this study.\n") } ## ----------------------------------------------------------------------------- if (is.data.frame(data$conceptsData$concept)) { utils::head(data$conceptsData$concept, 10) } else { cat("No `concept` table available in this study.\n") } ## ----------------------------------------------------------------------------- configOverview <- data.frame( field = c("complName", "has_metadata"), value = c(as.character(data$config$complName), !is.null(data$config$metadata)), stringsAsFactors = FALSE ) configOverview ## ----------------------------------------------------------------------------- if (is.list(data$config$metadata)) { scalarMetadata <- Filter(function(x) length(x) == 1 && !is.list(x), data$config$metadata) if (length(scalarMetadata) > 0) { utils::head( data.frame( field = names(scalarMetadata), value = unlist(scalarMetadata, use.names = FALSE), stringsAsFactors = FALSE ), 10 ) } else { cat("No scalar metadata fields available for preview.\n") } } else { cat("No metadata block available in config.\n") }