## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ---- eval=FALSE, echo=TRUE---------------------------------------------------
#  install.packages("conjurer")

## ---- eval=TRUE, echo=TRUE, results='markup'----------------------------------
library(conjurer)
customers <- buildCust(numOfCust =  100)
print(head(customers))

## ---- eval=TRUE, echo=TRUE, results='markup'----------------------------------
custNames <- as.data.frame(buildNames(numOfNames = 100, minLength = 5, maxLength = 7))

#set column heading
colnames(custNames) <- c("customerName")
print(head(custNames))

## ---- eval=TRUE, echo=TRUE, results='markup'----------------------------------
customer2name <- cbind(customers, custNames)
#set column heading
print(head(customer2name))

## ---- eval=TRUE, echo=TRUE, results='markup'----------------------------------
custAge <- as.data.frame(round(buildNum(n = 10, st = 23, en = 80, disp = 0.5, outliers = 1)))

#set column heading
colnames(custAge) <- c("customerAge")
print(head(custAge))

## ---- eval=TRUE, echo=TRUE, results='markup'----------------------------------
customer2age <- cbind(customers, custAge)
#set column heading
print(head(customer2age))

## ---- eval=TRUE, echo=TRUE, results='markup'----------------------------------
parts <- list(c("+91","+44","+64"), c("("), c(491,324,211), c(")"), c(7821:8324))
probs <- list(c(0.25,0.25,0.50), c(1), c(0.30,0.60,0.10), c(1), c())
custPhoneNumbers <- as.data.frame(buildPattern(n=100,parts = parts, probs = probs))
head(custPhoneNumbers)

#set column heading
colnames(custPhoneNumbers) <- c("customerPhone")
print(head(custPhoneNumbers))

## ---- eval=TRUE, echo=TRUE, results='markup'----------------------------------
customer2phone <- cbind(customers, custPhoneNumbers)
#set column heading
print(head(customer2phone))

## ---- eval=TRUE, echo=TRUE, results='markup'----------------------------------
products <- buildProd(numOfProd = 10, minPrice = 5, maxPrice = 50)
print(head(products))

## ---- eval=TRUE, echo=TRUE, results='markup'----------------------------------
productHierarchy <- buildHierarchy(type = "equalSplit", splits = 2, numOfLevels = 2)
print(productHierarchy)

## ---- eval=TRUE, echo=TRUE, results='markup'----------------------------------
#Rename the dataframe
names(productHierarchy) <- c("category", "subcategory")

#Replace category with Food and Non-Food
productHierarchy$category <- gsub("Level_1_element_1", "Food", productHierarchy$category)
productHierarchy$category <- gsub("Level_1_element_2", "Non-Food", productHierarchy$category)

#Replace subCategories
productHierarchy$subcategory <- gsub("Level_2_element_1", "Beverages", productHierarchy$subcategory)
productHierarchy$subcategory <- gsub("Level_2_element_3", "Dairy", productHierarchy$subcategory)
productHierarchy$subcategory <- gsub("Level_2_element_2", "Sanitary", productHierarchy$subcategory)
productHierarchy$subcategory <- gsub("Level_2_element_4", "Household", productHierarchy$subcategory)

#Inspect the data to confirm the results 
productHierarchy <- productHierarchy[order(productHierarchy$category),]
print(productHierarchy)

## ---- eval=TRUE, echo=TRUE, results='markup'----------------------------------
transactions <- genTrans(cycles = "y", spike = 12, outliers = 1, transactions = 10000)

## ---- eval=TRUE, echo=TRUE, results='markup'----------------------------------
TxnAggregated <- aggregate(transactions$transactionID, by = list(transactions$dayNum), length)
plot(TxnAggregated, type = "l", ann = FALSE)

## ---- eval=TRUE, echo=TRUE, results='markup'----------------------------------
customer2transaction <- buildPareto(customers, transactions$transactionID, pareto = c(80,20))

## ---- eval=TRUE, echo=TRUE, results='markup'----------------------------------
names(customer2transaction) <- c('transactionID', 'customer')

#inspect the output
print(head(customer2transaction))

## ---- eval=TRUE, echo=TRUE, results='markup'----------------------------------
#First step is to ensure that the product hierarchy data frame has the same number of rows as number of products.
category <- productHierarchy$category
subcategory <- productHierarchy$subcategory
productHierarchy <- as.data.frame(cbind(category,subcategory,1:nrow(products)))

#Randomly assign the product hierarchy to the products. Ensure that the additional unused variable towards the end is dropped.
products <- cbind(products, productHierarchy[,c("category","subcategory")])
#inspect the output
print(head(products))

## ---- eval=TRUE, echo=TRUE, results='markup'----------------------------------
product2transaction <- buildPareto(products$SKU,transactions$transactionID,pareto = c(70,30))
names(product2transaction) <- c('transactionID', 'SKU')

#inspect the output
print(head(product2transaction))

## ---- eval=TRUE, echo=TRUE, results='markup'----------------------------------
df1 <- merge(x = customer2transaction, y = product2transaction, by = "transactionID")

df2 <- merge(x = df1, y = transactions, by = "transactionID", all.x = TRUE)

#inspect the output
print(head(df2))

## ---- eval=TRUE, echo=TRUE, results='markup'----------------------------------
df3 <- merge(x = df2, y = customer2name, by.x = "customer", by.y = "customers", all.x = TRUE)
df4 <- merge(x = df3, y = customer2age, by.x = "customer", by.y = "customers", all.x = TRUE)
df5 <- merge(x = df4, y = customer2phone, by.x = "customer", by.y = "customers", all.x = TRUE)
df6 <- merge(x = df5, y = products, by = "SKU", all.x = TRUE)
dfFinal <- df6[,c("dayNum", "mthNum", "customer", "customerName", "customerAge", "customerPhone", "transactionID", "SKU", "Price", "category","subcategory")]


#inspect the output
print(head(dfFinal))

## ---- eval=TRUE, echo=TRUE, results='markup'----------------------------------
aggregatedDataDay <- aggregate(dfFinal$transactionID, by = list(dfFinal$dayNum), length)
plot(aggregatedDataDay, type = "l", ann = FALSE)

## ---- eval=TRUE, echo=TRUE, results='markup'----------------------------------
aggregatedDataMth <- aggregate(dfFinal$transactionID, by = list(dfFinal$mthNum), length)
aggregatedDataMthSorted <- aggregatedDataMth[order(aggregatedDataMth$Group.1),]
plot(aggregatedDataMthSorted, ann = FALSE)