--- title: "Available data sources" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Available data sources} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set(collapse = TRUE, comment = "#>", eval = FALSE) ``` This vignette provides a reference of every DATASUS data source registered in `datasusr`, with ready-to-run examples for each one. ## Source catalog overview ```{r} library(datasusr) datasus_sources() ``` The `access` column indicates which function to use with each source: - **`fetch`** — use `datasus_fetch()`, or the step-by-step workflow with `datasus_list_files()`, `datasus_download()`, and `read_datasus_dbc()`. These sources contain `.dbc` files on the DATASUS FTP. - **`territory`** — use `datasus_get_territory()`. These are CSV reference tables (municipalities, health regions, etc.). - **`ftp_only`** — use `datasus_ftp_ls()` to browse. These are software downloads (TabWin, TabNet), not data files. ## Hospital information (SIH) The Hospital Information System (SIHSUS) publishes monthly files by state. ```{r} # Reduced Hospital Admission Records df <- datasus_fetch( source = "SIHSUS", file_type = "RD", year = 2024, month = 1, uf = "PE", select = c("uf_zi", "ano_cmpt", "munic_res", "val_tot") ) # Rejected admissions df <- datasus_fetch( source = "SIHSUS", file_type = "RJ", year = 2024, month = 1, uf = "PE" ) # Professional services df <- datasus_fetch( source = "SIHSUS", file_type = "SP", year = 2024, month = 1, uf = "PE" ) ``` ## Outpatient information (SIA) The Outpatient Information System (SIASUS) also publishes monthly files by state. ```{r} # Outpatient production df <- datasus_fetch( source = "SIASUS", file_type = "PA", year = 2024, month = 1, uf = "PE" ) # Medication authorisations (APAC) df <- datasus_fetch( source = "SIASUS", file_type = "AM", year = 2024, month = 1, uf = "PE" ) ``` ## Mortality (SIM) The Mortality Information System (SIM) publishes yearly files. Death records (DO) are scoped by state; specialised subsets (DOFET, DOEXT, DOINF, DOMAT) cover all of Brazil. ```{r} # Death records by state (4-digit year in file name) df <- datasus_fetch( source = "SIM", file_type = "DO", year = 2022, uf = "PE" ) # Foetal deaths df <- datasus_fetch( source = "SIM", file_type = "DOFET", year = 2022 ) # Deaths from external causes df <- datasus_fetch( source = "SIM", file_type = "DOEXT", year = 2022 ) # Infant deaths df <- datasus_fetch( source = "SIM", file_type = "DOINF", year = 2022 ) # Maternal deaths df <- datasus_fetch( source = "SIM", file_type = "DOMAT", year = 2022 ) ``` ## Live births (SINASC) The Live Birth Information System publishes yearly files by state. ```{r} df <- datasus_fetch( source = "SINASC", file_type = "DN", year = 2022, uf = "PE" ) ``` ## Health facilities (CNES) The National Registry of Health Facilities publishes monthly files by state across many subtypes. ```{r} # Facilities df <- datasus_fetch( source = "CNES", file_type = "ST", year = 2024, month = 1, uf = "PE" ) # Hospital beds df <- datasus_fetch( source = "CNES", file_type = "LT", year = 2024, month = 1, uf = "PE" ) # Professionals df <- datasus_fetch( source = "CNES", file_type = "PF", year = 2024, month = 1, uf = "PE" ) # Equipment df <- datasus_fetch( source = "CNES", file_type = "EQ", year = 2024, month = 1, uf = "PE" ) # Specialised services df <- datasus_fetch( source = "CNES", file_type = "SR", year = 2024, month = 1, uf = "PE" ) ``` See `datasus_file_types(source = "CNES")` for the full list of CNES subtypes (LT, ST, DC, EQ, SR, HB, PF, EP, RC, IN, EE, EF, GM). ## Hospital and outpatient reporting (CIHA / CIH) CIHA replaced CIH in 2011. Both publish monthly files by state. ```{r} # CIHA (2011 onwards) df <- datasus_fetch( source = "CIHA", file_type = "CIHA", year = 2024, month = 1, uf = "PE" ) # CIH (historical, 2008-2011) df <- datasus_fetch( source = "CIH", file_type = "CR", year = 2010, month = 1, uf = "PE" ) ``` ## Notifiable diseases (SINAN) SINAN publishes yearly files with national scope (no UF filter needed). ```{r} # Dengue df <- datasus_fetch( source = "SINAN", file_type = "DENG", year = 2023 ) # Chikungunya df <- datasus_fetch( source = "SINAN", file_type = "CHIK", year = 2023 ) # Zika df <- datasus_fetch( source = "SINAN", file_type = "ZIKA", year = 2023 ) # Malaria df <- datasus_fetch( source = "SINAN", file_type = "MALA", year = 2023 ) ``` Preliminary SINAN data is available through the `SINAN_P` source. ## Other disease surveillance ```{r} # e-SUS Notifica (chronic Chagas disease) df <- datasus_fetch( source = "ESUSNOTIFICA", file_type = "DCCR", year = 2023 ) # Suspected congenital Zika syndrome (RESP) df <- datasus_fetch( source = "RESP", file_type = "RESP", year = 2022, uf = "PE" ) ``` ## Oncology panel ```{r} df <- datasus_fetch( source = "PO", file_type = "PO", year = 2022 ) ``` ## Schistosomiasis control (PCE) ```{r} df <- datasus_fetch( source = "PCE", file_type = "PCE", year = 2022, uf = "PE" ) ``` ## Discontinued and replaced systems SISCOLO and SISMAMA were replaced by SISCAN and are no longer available on the DATASUS FTP. SISPRENATAL data may still be available for historical periods. ```{r} # Prenatal monitoring (historical) df <- datasus_fetch( source = "SISPRENATAL", file_type = "PN", year = 2014, month = 1, uf = "PE" ) ``` ## Territorial reference tables Territorial data (municipality names, health regions, geographic divisions) is published as CSV files organised by year. Use `datasus_get_territory()`: ```{r} # Municipality table (defaults to current year) municipalities <- datasus_get_territory("tb_municip") municipalities # Specific year municipalities_2023 <- datasus_get_territory("tb_municip", year = 2023) # Browse available years and tables datasus_ftp_ls("ftp://ftp.datasus.gov.br/territorio/tabelas/") ``` ## Documentation and data dictionaries Each information system has documentation files on the DATASUS FTP. Use `datasus_docs_url()` to find them: ```{r} # All known documentation paths datasus_docs_url() # List documentation files for a specific system datasus_ftp_ls(datasus_docs_url("CNES")$docs_url[[1]]) ``` ## Connectivity check The following code tests path resolution for every source and file type in the catalog: ```{r} library(dplyr) sources_dbc <- datasus_sources() |> filter(access == "fetch") results <- purrr::map(seq_len(nrow(sources_dbc)), \(i) { src <- sources_dbc$source[[i]] fts <- datasus_file_types(source = src) purrr::map(seq_len(nrow(fts)), \(j) { ft <- fts$file_type[[j]] ok <- tryCatch({ datasus_build_path(source = src, file_type = ft, year = 2023, month = 1) TRUE }, error = function(e) FALSE) tibble::tibble(source = src, file_type = ft, has_path = ok) }) |> purrr::list_rbind() }) |> purrr::list_rbind() results |> print(n = Inf) ``` ## Cleaning up The examples above download files to the local cache. To remove all cached files after testing: ```{r} datasus_cache_info() datasus_cache_clear() ```