## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = FALSE, cache = FALSE ) ## ----setup, message = F, eval = T--------------------------------------------- library(processpredictR) library(bupaR) library(ggplot2) library(dplyr) library(keras) library(purrr) ## ----echo = F, eval = T, out.width = "60%", fig.align = "center"-------------- knitr::include_graphics("framework.PNG") ## ---- eval = T---------------------------------------------------------------- df <- prepare_examples(traffic_fines, task = "outcome") df ## ---- eval = T---------------------------------------------------------------- set.seed(123) split <- df %>% split_train_test(split = 0.8) split$train_df %>% head(5) split$test_df %>% head(5) ## ---- eval = T---------------------------------------------------------------- nrow(split$train_df) / nrow(df) n_distinct(split$train_df$case_id) / n_distinct(df$case_id) ## ----------------------------------------------------------------------------- # model <- split$train_df %>% create_model(name = "my_model") # # pass arguments as ... that are applicable to keras::keras_model() # # model # is a list ## ----------------------------------------------------------------------------- # model %>% names() # objects from a returned list ## ----------------------------------------------------------------------------- # model$model$name # get the name of a model ## ----------------------------------------------------------------------------- # model$model$non_trainable_variables # list of non-trainable parameters of a model ## ----------------------------------------------------------------------------- # model %>% compile() # model compilation ## ----------------------------------------------------------------------------- # hist <- fit(object = model, train_data = split$train_df, epochs = 5) ## ----------------------------------------------------------------------------- # hist$params ## ----------------------------------------------------------------------------- # hist$metrics ## ----------------------------------------------------------------------------- # predictions <- model %>% predict(test_data = split$test_df, # output = "append") # default # predictions %>% head(5) ## ----------------------------------------------------------------------------- # predictions %>% class ## ----------------------------------------------------------------------------- # confusion_matrix(predictions) ## ---- out.width="100%", fig.width = 7----------------------------------------- # plot(predictions) + # theme(axis.text.x = element_text(angle = 90)) ## ---- out.width="100%", fig.width = 7----------------------------------------- # knitr::include_graphics("confusion_matrix.PNG") ## ----------------------------------------------------------------------------- # model %>% evaluate(split$test_df) ## ----------------------------------------------------------------------------- # # preprocessed dataset with categorical hot encoded features # df_next_time <- traffic_fines %>% # group_by_case() %>% # mutate(month = lubridate::month(min(timestamp), label = TRUE)) %>% # ungroup_eventlog() %>% # prepare_examples(task = "next_time", features = "month") %>% split_train_test() # # ## ----------------------------------------------------------------------------- # # the attributes of df are added or changed accordingly # # df_next_time$train_df %>% attr("features") ## ----------------------------------------------------------------------------- # df_next_time$train_df %>% attr("hot_encoded_categorical_features") ## ----------------------------------------------------------------------------- # df <- prepare_examples(traffic_fines, task = "next_activity") %>% split_train_test() # custom_model <- df$train_df %>% create_model(custom = TRUE, name = "my_custom_model") # custom_model ## ----------------------------------------------------------------------------- # custom_model <- custom_model %>% # stack_layers(layer_dropout(rate = 0.1)) %>% # stack_layers(layer_dense(units = 64, activation = 'relu')) # custom_model ## ----------------------------------------------------------------------------- # # this works too # custom_model %>% # stack_layers(layer_dropout(rate = 0.1), layer_dense(units = 64, activation = 'relu')) ## ----------------------------------------------------------------------------- # new_outputs <- custom_model$model$output %>% # custom_model$model to access a model and $output to access the outputs of that model # keras::layer_dropout(rate = 0.1) %>% # keras::layer_dense(units = custom_model$num_outputs, activation = 'softmax') # # custom_model <- keras::keras_model(inputs = custom_model$model$input, outputs = new_outputs, name = "new_custom_model") # custom_model # ## ----------------------------------------------------------------------------- # # class of the model # custom_model %>% class ## ----------------------------------------------------------------------------- # # compile # compile(object=custom_model, optimizer = "adam", # loss = loss_sparse_categorical_crossentropy(), # metrics = metric_sparse_categorical_crossentropy()) ## ----------------------------------------------------------------------------- # # the trace of activities must be tokenized # tokens_train <- df$train_df %>% tokenize() # map(tokens_train, head) # the output of tokens is a list # # ## ----------------------------------------------------------------------------- # # make sequences of equal length # x <- tokens_train$token_x %>% pad_sequences(maxlen = max_case_length(df$train_df), value = 0) # y <- tokens_train$token_y ## ---- eval=F------------------------------------------------------------------ # # train # fit(object = custom_model, x, y, epochs = 10, batch_size = 10) # see also ?keras::fit.keras.engine.training.Model # # # predict # tokens_test <- df$test_df %>% tokenize() # x <- tokens_test$token_x %>% pad_sequences(maxlen = max_case_length(df$train_df), value = 0) # predict(custom_model, x) # # # evaluate # tokens_test <- df$test_df %>% tokenize() # x <- tokens_test$token_x # # normalize by dividing y_test over the standard deviation of y_train # y <- tokens_test$token_y / sd(tokens_train$token_y) # evaluate(custom_model, x, y)