## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = FALSE, comment = "#>" ) ## ----setup, results = FALSE, warning = FALSE, message = FALSE----------------- library(ACEP) base <- rep(c( "La Fraternidad anunci贸 un paro de trenes en noviembre.\nM谩s informaci贸n en https://example.com", "@usuario reclam贸 mejoras salariales!!! #Transporte 馃槉", "Martes 08 de noviembre: trabajadores en paro total.", "El sindicato pidi贸 123 respuestas al gobierno municipal.", "A b c de la protesta" ), length.out = 500) ## ----twett, message=FALSE----------------------------------------------------- primer_tweet <- base[2] primer_tweet ## ----tolower, message=FALSE--------------------------------------------------- minus <- acep_clean(primer_tweet, tolower = TRUE, rm_cesp = FALSE, rm_emoji = FALSE, rm_hashtag = FALSE, rm_users = FALSE, rm_punt = FALSE, rm_num = FALSE, rm_url = FALSE, rm_meses = FALSE, rm_dias = FALSE, rm_stopwords = FALSE, rm_shortwords = FALSE, rm_newline = FALSE, rm_whitespace = FALSE, other_sw = NULL) cat(paste("****SIN tolower****\n", primer_tweet, "****\n", sep="")) cat(paste("****CON tolower****\n", minus, "****\n", sep="")) ## ----cesp, message=FALSE------------------------------------------------------ cesp <- acep_clean(primer_tweet, tolower = FALSE, rm_cesp = TRUE, rm_emoji = FALSE, rm_hashtag = FALSE, rm_users = FALSE, rm_punt = FALSE, rm_num = FALSE, rm_url = FALSE, rm_meses = FALSE, rm_dias = FALSE, rm_stopwords = FALSE, rm_shortwords = FALSE, rm_newline = FALSE, rm_whitespace = FALSE, other_sw = NULL) cat(paste("****SIN rm_cesp****\n", primer_tweet, "****\n", sep="")) cat(paste("****CON rm_cesp****\n", cesp, "****\n", sep="")) ## ----emoji, message=FALSE----------------------------------------------------- emoji <- acep_clean(primer_tweet, tolower = FALSE, rm_cesp = FALSE, rm_emoji = TRUE, rm_hashtag = FALSE, rm_users = FALSE, rm_punt = FALSE, rm_num = FALSE, rm_url = FALSE, rm_meses = FALSE, rm_dias = FALSE, rm_stopwords = FALSE, rm_shortwords = FALSE, rm_newline = FALSE, rm_whitespace = FALSE, other_sw = NULL) cat(paste("****SIN rm_emoji****\n", primer_tweet, "***\n", sep="")) cat(paste("****CON rm_emoji****\n", emoji, "****\n", sep="")) ## ----hash, message=FALSE------------------------------------------------------ con_hash <- base[40] hash <- acep_clean(base[40], tolower = FALSE, rm_cesp = FALSE, rm_emoji = FALSE, rm_hashtag = TRUE, rm_users = FALSE, rm_punt = FALSE, rm_num = FALSE, rm_url = FALSE, rm_meses = FALSE, rm_dias = FALSE, rm_stopwords = FALSE, rm_shortwords = FALSE, rm_newline = FALSE, rm_whitespace = FALSE, other_sw = NULL) cat(paste("****SIN rm_hashtag****\n", con_hash, "****\n", sep="")) cat(paste("****CON rm_hashtag****\n", hash, "****\n", sep="")) ## ----user, message=FALSE------------------------------------------------------ con_user <- base[12] user <- acep_clean(base[12], tolower = FALSE, rm_cesp = FALSE, rm_emoji = FALSE, rm_hashtag = FALSE, rm_users = TRUE, rm_punt = FALSE, rm_num = FALSE, rm_url = FALSE, rm_meses = FALSE, rm_dias = FALSE, rm_stopwords = FALSE, rm_shortwords = FALSE, rm_newline = FALSE, rm_whitespace = FALSE, other_sw = NULL) cat(paste("****SIN rm_users****\n", con_user, "****\n", sep="")) cat(paste("****CON rm_users****\n", user, "****\n", sep="")) ## ----punct, message=FALSE----------------------------------------------------- punt <- base[13] s_punt <- acep_clean(base[13], tolower = FALSE, rm_cesp = FALSE, rm_emoji = FALSE, rm_hashtag = FALSE, rm_users = FALSE, rm_punt = TRUE, rm_num = FALSE, rm_url = FALSE, rm_meses = FALSE, rm_dias = FALSE, rm_stopwords = FALSE, rm_shortwords = FALSE, rm_newline = FALSE, rm_whitespace = FALSE, other_sw = NULL) cat(paste("****SIN rm_punt****\n", punt, "****\n", sep="")) cat(paste("****CON rm_punt****\n", s_punt, "****\n", sep="")) ## ----num, message=FALSE------------------------------------------------------- num <- base[70] num_s <- acep_clean(base[70], tolower = FALSE, rm_cesp = FALSE, rm_emoji = FALSE, rm_hashtag = FALSE, rm_users = FALSE, rm_punt = FALSE, rm_num = TRUE, rm_url = FALSE, rm_meses = FALSE, rm_dias = FALSE, rm_stopwords = FALSE, rm_shortwords = FALSE, rm_newline = FALSE, rm_whitespace = FALSE, other_sw = NULL) cat(paste("****SIN rm_num****\n", num, "****\n", sep="")) cat(paste("****CON rm_num****\n", num_s, "****\n", sep="")) ## ----url, message=FALSE------------------------------------------------------- num <- base[70] num_s <- acep_clean(base[70], tolower = FALSE, rm_cesp = FALSE, rm_emoji = FALSE, rm_hashtag = FALSE, rm_users = FALSE, rm_punt = FALSE, rm_num = FALSE, rm_url = TRUE, rm_meses = FALSE, rm_dias = FALSE, rm_stopwords = FALSE, rm_shortwords = FALSE, rm_newline = FALSE, rm_whitespace = FALSE, other_sw = NULL) cat(paste("****SIN rm_url****\n", num, "****\n", sep="")) cat(paste("****CON rm_url****\n", num_s, "****\n", sep="")) ## ----meses, message=FALSE----------------------------------------------------- meses <- base[70] meses_s <- acep_clean(base[70], tolower = FALSE, rm_cesp = FALSE, rm_emoji = FALSE, rm_hashtag = FALSE, rm_users = FALSE, rm_punt = FALSE, rm_num = FALSE, rm_url = FALSE, rm_meses = TRUE, rm_dias = FALSE, rm_stopwords = FALSE, rm_shortwords = FALSE, rm_newline = FALSE, rm_whitespace = FALSE, other_sw = NULL) cat(paste("****SIN rm_mes****\n", meses, "****\n", sep="")) cat(paste("****CON rm_mes****\n", meses_s, "****\n", sep="")) ## ----dia, message=FALSE------------------------------------------------------- dia <- base[429] dia_s <- acep_clean(base[429], tolower = FALSE, rm_cesp = FALSE, rm_emoji = FALSE, rm_hashtag = FALSE, rm_users = FALSE, rm_punt = FALSE, rm_num = FALSE, rm_url = FALSE, rm_meses = FALSE, rm_dias = TRUE, rm_stopwords = FALSE, rm_shortwords = FALSE, rm_newline = FALSE, rm_whitespace = FALSE, other_sw = NULL) cat(paste("****SIN rm_dias****\n", dia, "****\n", sep="")) cat(paste("****CON rm_dias****\n", dia_s, "****\n", sep="")) ## ----stop, message=FALSE------------------------------------------------------ stopwords <- c("de", "la", "el") stopw <- base[429] stopw_w <- acep_clean(base[429], tolower = FALSE, rm_cesp = FALSE, rm_emoji = FALSE, rm_hashtag = FALSE, rm_users = FALSE, rm_punt = FALSE, rm_num = FALSE, rm_url = FALSE, rm_meses = FALSE, rm_dias = FALSE, rm_stopwords = TRUE, rm_shortwords = FALSE, rm_newline = FALSE, rm_whitespace = FALSE, other_sw = NULL) cat(paste("****SIN rm_stopwords****\n", stopw, "****\n", sep="")) cat(paste("****CON rm_stopwords****\n", stopw_w, "****\n", sep="")) ## ----short, message=FALSE----------------------------------------------------- short <- base[97] short_s <- acep_clean(base[97], tolower = FALSE, rm_cesp = FALSE, rm_emoji = FALSE, rm_hashtag = FALSE, rm_users = FALSE, rm_punt = FALSE, rm_num = FALSE, rm_url = FALSE, rm_meses = FALSE, rm_dias = FALSE, rm_stopwords = FALSE, rm_shortwords = TRUE, rm_newline = FALSE, rm_whitespace = FALSE, other_sw = NULL) cat(paste("****SIN rm_shortwords****\n", short, "****\n", sep="")) cat(paste("****CON rm_shortwords****\n", short_s, "****\n", sep="")) ## ----newline, message=FALSE--------------------------------------------------- newl <- base[2] newl_s <- acep_clean(base[2], tolower = FALSE, rm_cesp = FALSE, rm_emoji = FALSE, rm_hashtag = FALSE, rm_users = FALSE, rm_punt = FALSE, rm_num = FALSE, rm_url = FALSE, rm_meses = FALSE, rm_dias = FALSE, rm_stopwords = FALSE, rm_shortwords = FALSE, rm_newline = TRUE, rm_whitespace = FALSE, other_sw = NULL) cat(paste("****SIN rm_newline****\n",newl, "****\n", sep="")) cat(paste("****CON rm_newline****\n",newl_s, "****\n", sep="")) ## ----whitespace, message=FALSE------------------------------------------------ white <- base[60] white_s <- acep_clean(base[60], tolower = FALSE, rm_cesp = FALSE, rm_emoji = FALSE, rm_hashtag = FALSE, rm_users = FALSE, rm_punt = FALSE, rm_num = FALSE, rm_url = FALSE, rm_meses = FALSE, rm_dias = FALSE, rm_stopwords = FALSE, rm_shortwords = FALSE, rm_newline = FALSE, rm_whitespace = TRUE, other_sw = NULL) cat(paste("****SIN rm_whitespace****\n", white, "****\n", sep="")) cat(paste("****CON rm_whitespace****\n", white_s, "****\n", sep="")) ## ----other, message=FALSE----------------------------------------------------- osw <- base[2] osw_s <- acep_clean(base[2], tolower = FALSE, rm_cesp = FALSE, rm_emoji = FALSE, rm_hashtag = FALSE, rm_users = FALSE, rm_punt = FALSE, rm_num = FALSE, rm_url = FALSE, rm_meses = FALSE, rm_dias = FALSE, rm_stopwords = TRUE, rm_shortwords = FALSE, rm_newline = FALSE, rm_whitespace = FALSE, other_sw = c("conciliaci贸n", "Fraternidad")) cat(paste("****SIN other_sw****\n", osw, "****\n", sep="")) cat(paste("****CON other_sw****\n", osw_s, "****\n", sep=""))