## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup-------------------------------------------------------------------- library(scrutr) ## ----------------------------------------------------------------------------- result <- inspect(CO2) result ## ----------------------------------------------------------------------------- result <- inspect(CO2, nrow = TRUE) ## ----------------------------------------------------------------------------- data_list <- list( cars = cars, mtcars = mtcars[, c("mpg", "hp", "wt", "speed") |> intersect(names(mtcars))], iris = iris ) # Which variables are in which datasets? vars_detect(data_list) ## ----------------------------------------------------------------------------- # Use two datasets that share some columns shared_list <- list( df1 = data.frame(x = 1:3, y = c("a", "b", "c"), stringsAsFactors = FALSE), df2 = data.frame(x = c(1.1, 2.2, 3.3), y = c("d", "e", "f"), stringsAsFactors = FALSE) ) vars_compclasses(shared_list) ## ----eval = FALSE------------------------------------------------------------- # # Create a temporary folder with example datasets # mydir <- file.path(tempdir(), "scrutr_demo") # dir.create(mydir, showWarnings = FALSE) # # saveRDS(cars, file.path(mydir, "cars.rds")) # saveRDS(mtcars, file.path(mydir, "mtcars.rds")) # saveRDS(iris, file.path(mydir, "iris.rds")) # # # Run the full inspection pipeline # inspect_vars( # input_path = mydir, # output_path = mydir, # output_label = "demo", # considered_extensions = "rds" # ) # # # The output Excel file contains multiple sheets: # # dims, inspect_tot, one sheet per dataset, vars_detect, vars_compclasses, etc. # list.files(mydir, pattern = "\\.xlsx$") ## ----eval = FALSE------------------------------------------------------------- # convert_all( # input_folderpath = mydir, # considered_extensions = "rds", # to = "csv", # output_folderpath = file.path(mydir, "csv") # ) ## ----eval = FALSE------------------------------------------------------------- # # 1. Generate a mask template # mask_convert_r(output_path = mydir) # # # 2. Edit the mask in Excel, then: # convert_r( # mask_filepath = file.path(mydir, "mask_convert_r.xlsx"), # output_path = mydir # ) ## ----------------------------------------------------------------------------- # Find duplicates in a data frame df <- data.frame(id = c(1, 2, 2, 3, 3, 3), value = letters[1:6]) dupl_show(df, "id") ## ----------------------------------------------------------------------------- # Check a left join for key issues left_df <- data.frame(key = c("a", "b", "c")) right_df <- data.frame(key = c("a", "b", "b", "d"), val = 1:4) ljoin_checks(left_df, right_df, "key") ## ----------------------------------------------------------------------------- paths <- c("data/raw/2024/file1.csv", "data/raw/2024/file2.csv") # Keep only the first 2 levels path_move(paths, "/", 2) # Remove the last level (filename) path_move(paths, "/", -1)