## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(echo = TRUE) library(reticulate) # Replace the path below with the path of your Python environment # Then uncomment the command below: # Tip: BERTOPICR_VENV should be the folder that contains `pyvenv.cfg`. # Sys.setenv( # BERTOPICR_VENV = "C:/path/to/your/venv", # NOT_CRAN = "true" # ) # 1. Define the libraries you need required_modules <- c("bertopic", "umap", "hdbscan", "sklearn", "numpy", "sentence_transformers", "torch") # macOS: if reticulate fails to load Python libraries, run once per session. if (identical(Sys.info()[["sysname"]], "Darwin")) { bertopicr::configure_macos_homebrew_zlib() } # Optional: point reticulate at a user-specified virtualenv venv <- Sys.getenv("BERTOPICR_VENV") if (nzchar(venv)) { venv_cfg <- file.path(venv, "pyvenv.cfg") if (file.exists(venv_cfg)) { reticulate::use_virtualenv(venv, required = TRUE) } else { message("Warning: BERTOPICR_VENV does not point to a valid virtualenv: ", venv) } } # Try to find python, but don't crash if it's missing (e.g. on another user's machine) if (!reticulate::py_available(initialize = TRUE)) { try(reticulate::use_python(Sys.which("python"), required = FALSE), silent = TRUE) } # 2. Check if they are installed python_ready <- tryCatch({ # Attempt to initialize python and check modules py_available(initialize = TRUE) && all(vapply(required_modules, py_module_available, logical(1))) }, error = function(e) FALSE) # 3. Only evaluate chunks when Python is ready and NOT_CRAN is set run_chunks <- python_ready && identical(Sys.getenv("NOT_CRAN"), "true") knitr::opts_chunk$set(eval = run_chunks) if (!python_ready) { message("Warning: Required Python modules are not available. Vignette code will not run.") } else { message("Python environment ready: ", reticulate::py_config()$python) if (!identical(Sys.getenv("NOT_CRAN"), "true")) { message("Note: Set NOT_CRAN=true to run Python-dependent chunks locally.") } } ## ----------------------------------------------------------------------------- # library(reticulate) # library(bertopicr) # library(readr) # library(dplyr) ## ----------------------------------------------------------------------------- # reticulate::py_run_string(code = "import torch # print(torch.cuda.is_available())") # if GPU is available then TRUE else FALSE ## ----------------------------------------------------------------------------- # loaded <- load_bertopic_model("topic_model") # set the location of the model! # model <- loaded$model # extras <- loaded$extras ## ----------------------------------------------------------------------------- # sample_path <- system.file("extdata", "spiegel_sample.rds", package = "bertopicr") # df <- read_rds(sample_path) # docs <- df |> pull(text_clean) ## ----------------------------------------------------------------------------- # doc_info <- get_document_info_df(model = model, texts = docs) # topic_info <- get_topic_info_df(model = model) # topics_df <- get_topics_df(model = model) ## ----------------------------------------------------------------------------- # visualize_barchart(model = model, filename = "barchart_demo") # visualize_distribution( # model = model, # text_id = 1, # probabilities = extras$probabilities, # filename = "vis_topic_dist_demo" # ) # visualize_heatmap(model = model, filename = "vis_heat_demo") # visualize_topics(model = model, filename = "dist_map_demo") # ## ----------------------------------------------------------------------------- # visualize_documents(model = model, docs, reduced_embeddings = extras$reduced_embeddings_2d) # visualize_documents_2d(model = model, docs, reduced_embeddings = extras$reduced_embeddings_2d) # visualize_documents_3d(model = model, docs, reduced_embeddings = extras$reduced_embeddings_3d) # ## ----eval=FALSE--------------------------------------------------------------- # visualize_topics_over_time(model = model, topics_over_time_model = extras$topics_over_time) # visualize_topics_per_class(model, extras$topics_per_class, auto_open = FALSE) #