--- title: "Load and Reuse a BERTopic Model" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Load and Reuse a BERTopic Model} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) library(reticulate) # Replace the path below with the path of your Python environment # Then uncomment the command below: # Tip: BERTOPICR_VENV should be the folder that contains `pyvenv.cfg`. # Sys.setenv( # BERTOPICR_VENV = "C:/path/to/your/venv", # NOT_CRAN = "true" # ) # 1. Define the libraries you need required_modules <- c("bertopic", "umap", "hdbscan", "sklearn", "numpy", "sentence_transformers", "torch") # macOS: if reticulate fails to load Python libraries, run once per session. if (identical(Sys.info()[["sysname"]], "Darwin")) { bertopicr::configure_macos_homebrew_zlib() } # Optional: point reticulate at a user-specified virtualenv venv <- Sys.getenv("BERTOPICR_VENV") if (nzchar(venv)) { venv_cfg <- file.path(venv, "pyvenv.cfg") if (file.exists(venv_cfg)) { reticulate::use_virtualenv(venv, required = TRUE) } else { message("Warning: BERTOPICR_VENV does not point to a valid virtualenv: ", venv) } } # Try to find python, but don't crash if it's missing (e.g. on another user's machine) if (!reticulate::py_available(initialize = TRUE)) { try(reticulate::use_python(Sys.which("python"), required = FALSE), silent = TRUE) } # 2. Check if they are installed python_ready <- tryCatch({ # Attempt to initialize python and check modules py_available(initialize = TRUE) && all(vapply(required_modules, py_module_available, logical(1))) }, error = function(e) FALSE) # 3. Only evaluate chunks when Python is ready and NOT_CRAN is set run_chunks <- python_ready && identical(Sys.getenv("NOT_CRAN"), "true") knitr::opts_chunk$set(eval = run_chunks) if (!python_ready) { message("Warning: Required Python modules are not available. Vignette code will not run.") } else { message("Python environment ready: ", reticulate::py_config()$python) if (!identical(Sys.getenv("NOT_CRAN"), "true")) { message("Note: Set NOT_CRAN=true to run Python-dependent chunks locally.") } } ``` This vignette shows how to load a previously saved BERTopic model in a new session and reuse the extras stored alongside it. Set `eval = TRUE` for the chunks you want to run. ## Load R packages Python environment selection and checks are handled in the hidden setup chunk at the top of the vignette. ```{r} library(reticulate) library(bertopicr) library(readr) library(dplyr) ``` ## GPU availability (optional) ```{r} reticulate::py_run_string(code = "import torch print(torch.cuda.is_available())") # if GPU is available then TRUE else FALSE ``` ## Load the model bundle ```{r} loaded <- load_bertopic_model("topic_model") # set the location of the model! model <- loaded$model extras <- loaded$extras ``` ## Load data for inspection ```{r} sample_path <- system.file("extdata", "spiegel_sample.rds", package = "bertopicr") df <- read_rds(sample_path) docs <- df |> pull(text_clean) ``` ## Create tables from the loaded model ```{r} doc_info <- get_document_info_df(model = model, texts = docs) topic_info <- get_topic_info_df(model = model) topics_df <- get_topics_df(model = model) ``` ## Use extras and visualizations ```{r} visualize_barchart(model = model, filename = "barchart_demo") visualize_distribution( model = model, text_id = 1, probabilities = extras$probabilities, filename = "vis_topic_dist_demo" ) visualize_heatmap(model = model, filename = "vis_heat_demo") visualize_topics(model = model, filename = "dist_map_demo") ``` ```{r} visualize_documents(model = model, docs, reduced_embeddings = extras$reduced_embeddings_2d) visualize_documents_2d(model = model, docs, reduced_embeddings = extras$reduced_embeddings_2d) visualize_documents_3d(model = model, docs, reduced_embeddings = extras$reduced_embeddings_3d) ``` The following visualizations work only if *topics_over_time* and *topics_per_class* were defined after model training or within the `train_bertopic_model()` function. ```{r, eval=FALSE} visualize_topics_over_time(model = model, topics_over_time_model = extras$topics_over_time) visualize_topics_per_class(model, extras$topics_per_class, auto_open = FALSE) ```