## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(echo = TRUE) ## ----eval=F------------------------------------------------------------------- # # library(tensorflow) # library(keras) # library(data.table) # library(tfdatasets) # library(tfaddons) # # # Preprocessing ----------------------------------------------------------- # # # Assumes you've downloaded and unzipped one of the bilingual datasets offered at # # http://www.manythings.org/anki/ and put it into a directory "data" # # This example translates English to Dutch. # download_data = function(){ # if(!dir.exists('data')) { # dir.create('data') # } # if(!file.exists('data/nld-eng.zip')) { # download.file('http://www.manythings.org/anki/nld-eng.zip', # destfile = file.path("data", basename('nld-eng.zip'))) # unzip('data/nld-eng.zip', exdir = 'data') # } # } # # download_data() # # filepath <- file.path("data", "nld.txt") # # df = data.table::fread(filepath, header = FALSE, encoding = 'UTF-8', # select = c(1,2), nrows = -1) # # text_cleaner <- function(text){ # text = text %>% # # replace non ascii # textclean::replace_non_ascii() %>% # # remove all non relevant symbols (letters, spaces, and apostrophes are retained) # textclean::strip(apostrophe.remove = TRUE) %>% # paste(' ', ., ' ') # } # # df = sapply(1:2, function(x) text_cleaner(df[[x]])) %>% as.data.table() # # text_tok <- function(text) { # tokenizer = text_tokenizer(filters='') # tokenizer %>% fit_text_tokenizer(text) # vocab_size = tokenizer$word_index # data = tokenizer %>% # texts_to_sequences(text) %>% # pad_sequences(padding='post') # list(vocab_size,data,tokenizer) # } # # c(input_vocab_size, data_en, tokenizer_en) %<-% c(df[['V1']] %>% text_tok()) # # c(output_vocab_size, data_de, tokenizer_de) %<-% c(df[['V2']] %>% text_tok()) # # # # Split the dataset # indices_to_take = sample.int(n = nrow(df), size = floor(0.8*nrow(df)), replace = FALSE) # # split_data <- function(data) { # c(train, test) %<-% list(data[indices_to_take, ], data[-indices_to_take, ] ) # list(train, test) # } # # # c(en_train, en_test, de_train, de_test) %<-% c(split_data(data_en), split_data(data_de)) # # rm(df, filepath, indices_to_take, download_data, split_data, text_cleaner, text_tok) # # batch_size = 64L # buffer_size = nrow(en_train) # steps_per_epoch = buffer_size %/% batch_size # embedding_dims = 256L # rnn_units = 1024L # dense_units = 1024L # dtype = tf$float32 #used to initialize DecoderCell Zero state # # # dataset = tensor_slices_dataset(list(en_train, de_train)) %>% # dataset_shuffle(buffer_size) %>% dataset_batch(batch_size, drop_remainder = TRUE) # # # EncoderNetwork = reticulate::PyClass( # 'EncoderNetwork', # inherit = tf$keras$Model, # defs = list( # # `__init__` = function(self, input_vocab_size, embedding_dims, rnn_units) { # # super()$`__init__`() # # self$encoder_embedding = layer_embedding(input_dim = length(input_vocab_size), # output_dim = embedding_dims) # self$encoder_rnnlayer = layer_lstm(units = rnn_units, return_sequences = TRUE, # return_state = TRUE) # NULL # } # ) # ) # # # # DecoderNetwork = reticulate::PyClass( # 'DecoderNetwork', # inherit = tf$keras$Model, # defs = list( # # `__init__` = function(self, output_vocab_size, embedding_dims, rnn_units) { # # super()$`__init__`() # self$decoder_embedding = layer_embedding(input_dim = length(output_vocab_size), # output_dim = embedding_dims) # self$dense_layer = layer_dense(units = length(output_vocab_size)) # self$decoder_rnncell = tf$keras$layers$LSTMCell(rnn_units) # # Sampler # self$sampler = sampler_training() # # Create attention mechanism with memory = NULL # self$attention_mechanism = self$build_attention_mechanism(dense_units, NULL, c(rep(ncol(data_en), batch_size))) # self$rnn_cell = self$build_rnn_cell(batch_size) # self$decoder = decoder_basic(cell=self$rnn_cell, sampler = self$sampler, # output_layer = self$dense_layer) # NULL # }, # # # # build_attention_mechanism = function(self, units, memory, memory_sequence_length) { # attention_luong(units = units , memory = memory, # memory_sequence_length = memory_sequence_length) # }, # # build_rnn_cell = function(self, batch_size) { # rnn_cell = attention_wrapper(cell = self$decoder_rnncell, # attention_mechanism = self$attention_mechanism, # attention_layer_size = dense_units) # rnn_cell # }, # # build_decoder_initial_state = function(self, batch_size, encoder_state, dtype) { # decoder_initial_state = self$rnn_cell$get_initial_state(batch_size = batch_size, # dtype = dtype) # decoder_initial_state = decoder_initial_state$clone(cell_state = encoder_state) # decoder_initial_state # } # ) # ) # # encoderNetwork = EncoderNetwork(input_vocab_size, embedding_dims, rnn_units) # decoderNetwork = DecoderNetwork(output_vocab_size, embedding_dims, rnn_units) # optimizer = tf$keras$optimizers$Adam() # # # # loss_function <- function(y_pred, y) { # #shape of y [batch_size, ty] # #shape of y_pred [batch_size, Ty, output_vocab_size] # loss = keras::loss_sparse_categorical_crossentropy(y, y_pred) # mask = tf$logical_not(tf$math$equal(y,0L)) #output 0 for y=0 else output 1 # mask = tf$cast(mask, dtype=loss$dtype) # loss = mask * loss # loss = tf$reduce_mean(loss) # loss # } # # train_step <- function(input_batch, output_batch,encoder_initial_cell_state) { # loss = 0L # # with(tf$GradientTape() %as% tape, { # encoder_emb_inp = encoderNetwork$encoder_embedding(input_batch) # c(a, a_tx, c_tx) %<-% encoderNetwork$encoder_rnnlayer(encoder_emb_inp, # initial_state = encoder_initial_cell_state) # # #[last step activations,last memory_state] of encoder passed as input to decoder Network # # Prepare correct Decoder input & output sequence data # decoder_input = tf$convert_to_tensor(output_batch %>% as.array() %>% .[,1:45]) # ignore # #compare logits with timestepped +1 version of decoder_input # decoder_output = tf$convert_to_tensor(output_batch %>% as.array() %>% .[,2:46]) #ignore # # # Decoder Embeddings # decoder_emb_inp = decoderNetwork$decoder_embedding(decoder_input) # # #Setting up decoder memory from encoder output and Zero State for AttentionWrapperState # decoderNetwork$attention_mechanism$setup_memory(a) # decoder_initial_state = decoderNetwork$build_decoder_initial_state(batch_size, # encoder_state = list(a_tx, c_tx), # dtype = tf$float32) # #BasicDecoderOutput # c(outputs, res1, res2) %<-% decoderNetwork$decoder(decoder_emb_inp,initial_state = decoder_initial_state, # sequence_length = c(rep(ncol(data_en) - 1L, batch_size))) # # logits = outputs$rnn_output # #Calculate loss # # loss = loss_function(logits, decoder_output) # # }) # #Returns the list of all layer variables / weights. # variables = c(encoderNetwork$trainable_variables, decoderNetwork$trainable_variables) # # differentiate loss wrt variables # gradients = tape$gradient(loss, variables) # #grads_and_vars – List of(gradient, variable) pairs. # grads_and_vars = purrr::transpose(list(gradients,variables)) # optimizer$apply_gradients(grads_and_vars) # loss # } # # initialize_initial_state = function() { # list(tf$zeros(c(batch_size, rnn_units)), tf$zeros(c(batch_size, rnn_units))) # } # # # epochs = 1 # # # for (i in 1:sum(epochs + 1)) { # encoder_initial_cell_state = initialize_initial_state() # total_loss = 0.0 # res = dataset %>% dataset_take(steps_per_epoch) %>% iterate() # for (batch in 1:length(res)) { # c(input_batch, output_batch) %<-% res[[batch]] # batch_loss = train_step(input_batch, output_batch, encoder_initial_cell_state) # total_loss = total_loss + batch_loss # if((batch+1) %% 5 == 0) { # print(paste('total loss:', batch_loss$numpy(), 'epoch', i, 'batch',batch+1)) # } # } # # } #