---
title: "Tidy pipelines and structured output"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{Tidy pipelines and structured output}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r}
knitr::opts_chunk$set(
  collapse = TRUE, comment = "#>",
  eval = identical(tolower(Sys.getenv("LLMR_RUN_VIGNETTES", "false")), "true")
)
```

We’ll show both unstructured and structured pipelines, using four model names:
- gpt-5-nano (OpenAI)
- claude-sonnet-4-20250514 (Anthropic)
- gemini-2.5-flash (Gemini)
- openai/gpt-oss-20b (Groq)

You will need environment variables OPENAI_API_KEY, ANTHROPIC_API_KEY, GEMINI_API_KEY, GROQ_API_KEY.

```{r}
library(LLMR)
library(dplyr)

cfg_openai <- llm_config("openai",   "gpt-5-nano")
cfg_cld    <- llm_config("anthropic","claude-sonnet-4-20250514", max_tokens = 512)  # avoid warnings; Anthropic requires max_tokens
cfg_gemini <- llm_config("gemini",   "gemini-2.5-flash")
cfg_groq   <- llm_config("groq",     "openai/gpt-oss-20b")
```

## llm_fn: unstructured (OpenAI)
```{r}
words <- c("excellent", "awful", "fine")
out <- llm_fn(
  words,
  prompt  = "Classify '{x}' as Positive, Negative, or Neutral.",
  .config = cfg_openai,
  .return = "columns"
)
out
```

## llm_fn: unstructured (Groq)
```{r}
out_groq <- llm_fn(
  words,
  prompt  = "Classify '{x}' as Positive, Negative, or Neutral.",
  .config = cfg_groq,
  .return = "columns"
)
out_groq
```

## llm_fn_structured: schema-first (OpenAI)
```{r}
schema <- list(
  type = "object",
  properties = list(
    label = list(type = "string", description = "Sentiment label"),
    score = list(type = "number", description = "Confidence 0..1")
  ),
  required = list("label", "score"),
  additionalProperties = FALSE
)

out_s <- llm_fn_structured(
  x = words,
  prompt  = "Classify '{x}' as Positive, Negative, or Neutral with confidence.",
  .config = cfg_openai,
  .schema = schema,
  .fields = c("label", "score")
)
out_s
```

## llm_mutate: unstructured (Anthropic)
```{r}
df <- tibble::tibble(
  id   = 1:3,
  text = c("Cats are great pets", "The weather is bad", "I like tea")
)

df_u <- df |>
  llm_mutate(
    answer,
    prompt  = "Give a short category for: {text}",
    .config = cfg_cld,
    .return = "columns"
  )

df_u
```

## llm_mutate_structured: structured (Gemini)
```{r}
schema2 <- list(
  type = "object",
  properties = list(
    category  = list(type = "string"),
    rationale = list(type = "string")
  ),
  required = list("category", "rationale"),
  additionalProperties = FALSE
)

df_s <- df |>
  llm_mutate_structured(
    annot,
    prompt  = "Extract category and a one-sentence rationale for: {text}",
    .config = cfg_gemini,
    .schema = schema2
    # Because a schema is present, fields auto-hoist; you can also pass:
    # .fields = c("category", "rationale")
  )

df_s
```