--- title: "Data Analysis Examples" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Data Analysis Examples} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = FALSE, fig.width = 8, fig.height = 5 ) ``` This article demonstrates practical data analysis examples using pixr to explore PIX adoption and usage patterns in Brazil. ## Setup ```{r setup, message=FALSE} library(pixr) library(dplyr) library(tidyr) library(ggplot2) library(scales) # Set theme for all plots theme_set(theme_minimal(base_size = 12)) ``` ## Example 1: PIX Keys Market Share Analyze which financial institutions dominate the PIX keys market: ```{r market-share} # Get PIX keys data for December 2025 # Note: date uses YYYY-MM-DD format keys <- get_pix_keys(date = "2025-12-01") # Get summary by institution (top 20) top_institutions <- get_pix_keys_summary(date = "2025-12-01", n_top = 20) # Visualize top 10 top_institutions |> slice_head(n = 10) |> mutate(Nome = forcats::fct_reorder(Nome, total_keys)) |> ggplot(aes(x = total_keys / 1e6, y = Nome)) + geom_col(fill = "#008060") + geom_text( aes(label = sprintf("%.1fM", total_keys / 1e6)), hjust = -0.1, size = 3 ) + scale_x_continuous( labels = scales::number_format(suffix = "M"), expand = expansion(mult = c(0, 0.15)) ) + labs( title = "Top 10 PIX Participants by Registered Keys", subtitle = "December 2025", x = "Total Keys (Millions)", y = NULL, caption = "Source: Brazilian Central Bank Open Data" ) ``` ## Example 2: Key Type Distribution Analyze which types of PIX keys are most popular: ```{r key-types} # Get keys data and aggregate by type keys <- get_pix_keys(date = "2025-12-01") # Summarize by key type key_summary <- get_pix_keys_by_type(date = "2025-12-01") # Calculate percentages key_summary <- key_summary |> mutate( percentage = total_keys / sum(total_keys) * 100 ) # Visualize ggplot(key_summary, aes(x = reorder(TipoChave, -total_keys), y = total_keys / 1e6, fill = NaturezaUsuario)) + geom_col(position = "stack") + scale_y_continuous(labels = scales::number_format(suffix = "M")) + scale_fill_manual(values = c("PF" = "#008060", "PJ" = "#1e88e5")) + labs( title = "PIX Keys by Type", subtitle = "December 2025", x = "Key Type", y = "Total Keys (Millions)", fill = "User Type", caption = "Source: Brazilian Central Bank Open Data" ) + theme(legend.position = "bottom") ``` ## Example 3: Regional Transaction Analysis Analyze PIX usage patterns across Brazilian regions: ```{r regional-analysis} # Get transactions by region # Note: database uses YYYYMM format region_data <- get_pix_transactions_by_region(database = "202512") # Visualize total value by region region_data |> mutate( total_value_billions = (vl_pagador_pf + vl_pagador_pj) / 1e9 ) |> ggplot(aes(x = reorder(Regiao, total_value_billions), y = total_value_billions)) + geom_col(fill = "#008060") + coord_flip() + scale_y_continuous(labels = scales::number_format(prefix = "R$ ", suffix = "B")) + labs( title = "PIX Transaction Volume by Region", subtitle = "December 2025", x = NULL, y = "Transaction Value (Billions BRL)", caption = "Source: Brazilian Central Bank Open Data" ) ``` ## Example 4: State-Level Analysis ```{r state-analysis} # Get transactions by state state_data <- get_pix_transactions_by_state(database = "202512") # Top 10 states by transaction count state_data |> mutate( total_count = qt_pagador_pf + qt_pagador_pj ) |> slice_head(n = 10) |> mutate(Estado = forcats::fct_reorder(Estado, total_count)) |> ggplot(aes(x = total_count / 1e6, y = Estado)) + geom_col(fill = "#1e88e5") + scale_x_continuous(labels = scales::number_format(suffix = "M")) + labs( title = "Top 10 States by PIX Transaction Count", subtitle = "December 2025", x = "Transactions (Millions)", y = NULL, caption = "Source: Brazilian Central Bank Open Data" ) ``` ## Example 5: Transaction Nature Analysis Analyze transactions by nature (P2P, P2B, B2B, etc.): ```{r nature-analysis} # Get summary by transaction nature nature_summary <- get_pix_summary(database = "202509", group_by = "NATUREZA") # Visualize nature_summary |> ggplot(aes(x = reorder(NATUREZA, -total_value), y = total_value / 1e12)) + geom_col(fill = "#008060") + geom_text( aes(label = sprintf("R$ %.1fT", total_value / 1e12)), vjust = -0.5, size = 3 ) + scale_y_continuous( labels = scales::number_format(prefix = "R$ ", suffix = "T"), expand = expansion(mult = c(0, 0.15)) ) + labs( title = "PIX Transaction Value by Nature", subtitle = "September 2025 - P2P: Person to Person, P2B: Person to Business, etc.", x = "Transaction Nature", y = "Total Value (Trillions BRL)", caption = "Source: Brazilian Central Bank Open Data" ) ``` ## Example 6: Filtering Transactions by State Use OData filters to analyze specific states: ```{r state-filter} # Get transactions for Maranhão only using filter maranhao <- get_pix_transactions_by_municipality( database = "202512", filter = "Estado eq 'MARANHÃO'", orderby = "VL_PagadorPF desc" ) # Top 10 municipalities by value maranhao |> slice_head(n = 10) |> ggplot(aes(x = reorder(Municipio, VL_PagadorPF), y = VL_PagadorPF / 1e6)) + geom_col(fill = "#008060") + coord_flip() + scale_y_continuous(labels = scales::number_format(prefix = "R$ ", suffix = "M")) + labs( title = "Top 10 Municipalities in Maranhão by PIX Value", subtitle = "December 2025 - Individual payers", x = NULL, y = "Transaction Value (Millions BRL)", caption = "Source: Brazilian Central Bank Open Data" ) ``` ## Example 7: Comparing Regions with Filters ```{r region-comparison} # Get Northeast transactions using filter nordeste <- get_pix_transaction_stats( database = "202509", filter = "PAG_REGIAO eq 'NORDESTE'" ) |> summarise( total_value = sum(VALOR, na.rm = TRUE), total_count = sum(QUANTIDADE, na.rm = TRUE) ) |> mutate(region = "NORDESTE") # Get Southeast transactions sudeste <- get_pix_transaction_stats( database = "202509", filter = "PAG_REGIAO eq 'SUDESTE'" ) |> summarise( total_value = sum(VALOR, na.rm = TRUE), total_count = sum(QUANTIDADE, na.rm = TRUE) ) |> mutate(region = "SUDESTE") # Combine and compare comparison <- bind_rows(nordeste, sudeste) comparison |> pivot_longer(c(total_value, total_count), names_to = "metric", values_to = "value") |> ggplot(aes(x = region, y = value, fill = metric)) + geom_col(position = "dodge") + facet_wrap(~metric, scales = "free_y") + labs( title = "Northeast vs Southeast PIX Comparison", subtitle = "September 2025", x = NULL, y = NULL ) ``` ## Example 8: P2P vs P2B Analysis ```{r p2p-vs-p2b} # Get P2P transactions p2p <- get_pix_transaction_stats( database = "202509", filter = "NATUREZA eq 'P2P'" ) |> group_by(PAG_REGIAO) |> summarise(value = sum(VALOR, na.rm = TRUE)) |> mutate(type = "P2P") # Get P2B transactions p2b <- get_pix_transaction_stats( database = "202509", filter = "NATUREZA eq 'P2B'" ) |> group_by(PAG_REGIAO) |> summarise(value = sum(VALOR, na.rm = TRUE)) |> mutate(type = "P2B") # Combine combined <- bind_rows(p2p, p2b) # Visualize ggplot(combined, aes(x = PAG_REGIAO, y = value / 1e12, fill = type)) + geom_col(position = "dodge") + scale_fill_manual(values = c("P2P" = "#008060", "P2B" = "#1e88e5")) + scale_y_continuous(labels = scales::number_format(prefix = "R$ ", suffix = "T")) + labs( title = "P2P vs P2B Transactions by Region", subtitle = "September 2025", x = "Payer Region", y = "Transaction Value (Trillions BRL)", fill = "Transaction Type", caption = "Source: Brazilian Central Bank Open Data" ) + theme(legend.position = "bottom") ``` ## Example 9: Multiple Months Analysis Fetch data for multiple months and analyze trends: ```{r multi-month} # Get Q3 2025 data q3_data <- get_pix_transaction_stats_multi( databases = c("202507", "202508", "202509") ) # Aggregate by month and nature monthly_nature <- q3_data |> group_by(AnoMes, NATUREZA) |> summarise( total_value = sum(VALOR, na.rm = TRUE), .groups = "drop" ) # Visualize trends ggplot(monthly_nature, aes(x = factor(AnoMes), y = total_value / 1e12, fill = NATUREZA)) + geom_col(position = "stack") + scale_y_continuous(labels = scales::number_format(prefix = "R$ ", suffix = "T")) + labs( title = "PIX Transaction Value by Nature - Q3 2025", x = "Month", y = "Transaction Value (Trillions BRL)", fill = "Nature", caption = "Source: Brazilian Central Bank Open Data" ) + theme(legend.position = "bottom") ``` ## Example 10: Initiation Method Analysis Analyze how PIX transactions are initiated: ```{r initiation-method} # Get summary by initiation method method_summary <- get_pix_summary(database = "202509", group_by = "FORMAINICIACAO") # Add labels method_labels <- c( "DICT" = "PIX Key", "QRDN" = "Dynamic QR", "QRES" = "Static QR", "MANU" = "Manual", "INIC" = "Initiator" ) method_summary |> mutate( method_label = method_labels[FORMAINICIACAO], percentage = total_count / sum(total_count) * 100 ) |> ggplot(aes(x = reorder(method_label, -total_count), y = total_count / 1e9)) + geom_col(fill = "#008060") + geom_text( aes(label = sprintf("%.1f%%", percentage)), vjust = -0.5, size = 3 ) + scale_y_continuous( labels = scales::number_format(suffix = "B"), expand = expansion(mult = c(0, 0.15)) ) + labs( title = "PIX Transactions by Initiation Method", subtitle = "September 2025", x = "Initiation Method", y = "Transaction Count (Billions)", caption = "Source: Brazilian Central Bank Open Data" ) ``` ## Example 11: Creating a Summary Report Generate a comprehensive summary report: ```{r summary-report} # Function to create a PIX summary for a given month create_pix_summary <- function(database, date) { # Keys data keys <- get_pix_keys(date = date, verbose = FALSE) keys_summary <- keys |> summarise( total_keys = sum(qtdChaves, na.rm = TRUE), n_institutions = n_distinct(ISPB) ) # Transaction stats stats <- get_pix_summary(database = database, group_by = "NATUREZA", verbose = FALSE) stats_total <- stats |> summarise( total_value = sum(total_value), total_count = sum(total_count) ) # State data state <- get_pix_transactions_by_state(database = database, verbose = FALSE) top_state <- state |> mutate(total = vl_pagador_pf + vl_pagador_pj) |> slice_max(total, n = 1) |> pull(Estado) list( period = database, total_keys = keys_summary$total_keys, n_institutions = keys_summary$n_institutions, transaction_count = stats_total$total_count, transaction_value = stats_total$total_value, avg_transaction = stats_total$total_value / stats_total$total_count, top_state = top_state ) } # Generate summary for September 2025 summary_sep_2025 <- create_pix_summary( database = "202509", date = "2025-09-01" ) # Print formatted summary cat(sprintf( " === PIX Summary Report: %s === 📊 Keys & Participants - Total registered keys: %s - Active institutions: %d 💳 Transactions - Monthly count: %s - Monthly value: %s - Average transaction: %s 🗺️ Regional - Most active state: %s ", summary_sep_2025$period, format(summary_sep_2025$total_keys, big.mark = ","), summary_sep_2025$n_institutions, format(summary_sep_2025$transaction_count, big.mark = ","), format_brl(summary_sep_2025$transaction_value), format_brl(summary_sep_2025$avg_transaction), summary_sep_2025$top_state )) ``` ## Example 12: Exporting Data Export data for use in other tools: ```{r export-data} # Get comprehensive dataset keys <- get_pix_keys(date = "2025-12-01") # Export to CSV write.csv(keys, "pix_keys_202512.csv", row.names = FALSE) # Export to Excel (requires writexl package) # writexl::write_xlsx(keys, "pix_keys_202512.xlsx") # Export multiple sheets # writexl::write_xlsx( # list( # keys = keys, # stats = get_pix_transaction_stats(database = "202512"), # states = get_pix_transactions_by_state(database = "202512") # ), # "pix_data_202512.xlsx" # ) ``` ## Tips for Large-Scale Analysis ```{r tips} # 1. Use column selection to reduce memory usage small_data <- get_pix_keys( date = "2025-12-01", columns = c("Nome", "TipoChave", "qtdChaves"), verbose = FALSE ) # 2. Use filters to reduce data transfer filtered <- get_pix_transactions_by_municipality( database = "202512", filter = "Estado eq 'SÃO PAULO'", verbose = FALSE ) # 3. Process multiple months efficiently process_quarter <- function(year, quarter) { months <- switch(quarter, "Q1" = c("01", "02", "03"), "Q2" = c("04", "05", "06"), "Q3" = c("07", "08", "09"), "Q4" = c("10", "11", "12") ) databases <- paste0(year, months) get_pix_transaction_stats_multi(databases) } # Get Q3 2025 data # q3_2025 <- process_quarter(2025, "Q3") ``` ## See Also - [Introduction to pixr](pixr.html) - Getting started - [Understanding PIX Data](understanding-pix-data.html) - Data structure - [Working with OData Queries](odata-queries.html) - Advanced queries with filters