## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE, eval = FALSE)

## -----------------------------------------------------------------------------
#  library(fastai)
#  library(magrittr)
#  
#  URLs_WIKITEXT()
#  
#  path = 'wikitext-2'
#  
#  train = data.table::fread(paste(path, 'train.csv', sep = '/'), header = FALSE, fill = TRUE)
#  
#  test = data.table::fread(paste(path, 'test.csv', sep = '/'), header = FALSE, fill = TRUE)
#  
#  df = rbind(train, test)
#  
#  rm(train,test)

## -----------------------------------------------------------------------------
#  tr = reticulate::import('transformers')
#  pretrained_weights = 'gpt2'
#  tokenizer = tr$GPT2TokenizerFast$from_pretrained(pretrained_weights)
#  model = tr$GPT2LMHeadModel$from_pretrained(pretrained_weights)

## -----------------------------------------------------------------------------
#  tokenize = function(text) {
#    toks = tokenizer$tokenize(text)
#    tensor(tokenizer$convert_tokens_to_ids(toks))
#  }
#  
#  tokenized = list()
#  
#  for (i in 1:length(df$V1)) {
#    tokeniz = tokenize(df$V1[i])
#    tokenized = tokenized %>% append(tokeniz)
#    if(i %% 100 == 0 ) {
#      print(i)
#    }
#  }

## -----------------------------------------------------------------------------
#  tot = 1:nrow(df)
#  tr_idx = sample(nrow(df), 0.8 * nrow(df))
#  ts_idx = tot[!tot %in% tr_idx]
#  splits = list(tr_idx, ts_idx)

## -----------------------------------------------------------------------------
#  tls = TfmdLists(tokenized, TransformersTokenizer(tokenizer),
#                                   splits = splits,
#                                   dl_type = LMDataLoader())
#  
#  bs = 8
#  sl = 100
#  dls = tls %>% dataloaders(bs = bs, seq_len = sl)
#  
#  # Now, we are ready to create our Learner, which is a fastai object grouping data, model
#  # and loss function and handles model training or inference. Since we are in a language
#  #model setting, we pass perplexity as a metric, and we need to use the callback we just
#  # defined. Lastly, we use mixed precision to save every bit of memory we can (and if you
#  # have a modern GPU, it will also make training faster):
#  learn = Learner(dls, model, loss_func=CrossEntropyLossFlat(),
#                  cbs = list(TransformersDropOutput()),
#                  metrics = Perplexity())$to_fp16()
#  
#  learn %>% fit_one_cycle(1, 1e-4)

## -----------------------------------------------------------------------------
#  prompt = "\n = Unicorn = \n \n A unicorn is a magical creature with a rainbow tail and a horn"
#  prompt_ids = tokenizer$encode(prompt)
#  inp = tensor(prompt_ids)[NULL]$cuda()
#  preds = learn$model$generate(inp, max_length = 80L, num_beams = 5L, temperature = 1.5)
#  tokenizer$decode(as.integer(preds[0]$cpu()$numpy()))