\documentclass[a4paper]{article} %\VignetteIndexEntry{m61r base_r} %\VignettePackage{m61r} \setlength{\parindent}{0in} \setlength{\parskip}{.1in} \setlength{\textwidth}{140mm} \setlength{\oddsidemargin}{10mm} \title{dplyr-and-tidyr-like functions written in base r} \author{pv71u98h1} \begin{document} \maketitle \section{Introduction} \label{sec:intro} R-package m61r gathers functions similar to the ones present in dplyr and tidyr, but only written in base r, and without requiring any dependencies.\newline All the functions only work with data.frames. <>= library(m61r) @ \section{filter} \label{sec:filter} <>= tmp <- filter_(CO2,~Plant=="Qn1") head(tmp) @ <>= tmp <- filter_(CO2,~Type=="Quebec") head(tmp) @ \section{select} \label{sec:select} <>= tmp <- select_(CO2,~Type) head(tmp) @ <>= tmp <- select_(CO2,~c(Plant,Type)) head(tmp) @ <>= tmp <- select_(CO2,~-Type) head(tmp) @ <>= tmp <- select_(CO2,variable=~-(Plant:Treatment)) head(tmp) @ \section{mutate/transmutate} \label{sec:mutate-transmutate} <>= tmp <- mutate_(CO2,z=~conc/uptake) head(tmp) @ <>= tmp <- mutate_(CO2,mean=~mean(uptake)) head(tmp) @ <>= tmp <- mutate_(CO2,z1=~uptake/conc,y=~conc/100) head(tmp) @ <>= tmp <- transmutate_(CO2,z2=~uptake/conc,y2=~conc/100) head(tmp) @ \section{summarise} \label{sec:summarise} <>= tmp <- summarise_(CO2,mean=~mean(uptake),sd=~sd(uptake)) tmp @ <>= tmp <- summarise_(CO2, group=~c(Type,Treatment),mean=~mean(uptake),sd=~sd(uptake)) tmp @ \section{arrange/desange} \label{sec:arrange-desange} <>= tmp <- arrange_(CO2,~c(conc)) head(tmp) @ <>= tmp <- arrange_(CO2,~c(Treatment,conc,uptake)) head(tmp) @ <>= tmp <- desange_(CO2,~c(Treatment,conc,uptake)) head(tmp) @ \section{join} \label{sec:join} <>= authors <- data.frame( surname = I(c("Tukey", "Venables", "Tierney", "Ripley", "McNeil")), nationality = c("US", "Australia", "US", "UK", "Australia"), deceased = c("yes", rep("no", 4))) books <- data.frame( name = I(c("Tukey", "Venables", "Tierney","Ripley", "Ripley", "McNeil", "R Core")), title = c("Exploratory Data Analysis", "Modern Applied Statistics ...", "LISP-STAT", "Spatial Statistics", "Stochastic Simulation", "Interactive Data Analysis", "An Introduction to R"), other.author = c(NA, "Ripley", NA, NA, NA, NA,"Venables & Smith")) @ \subsection{inner join} \label{sec:inner-join} <>= <> tmp <- inner_join_(authors,books, by.x = "surname", by.y = "name") tmp @ \subsection{left join} \label{sec:left-join} <>= <> tmp <- left_join_(authors,books, by.x = "surname", by.y = "name") tmp @ \subsection{right join} \label{sec:right-join} <>= <> tmp <- right_join_(authors,books, by.x = "surname", by.y = "name") tmp @ \subsection{full join} \label{sec:full-join} <>= <> tmp <- full_join_(authors,books, by.x = "surname", by.y = "name") tmp @ \subsection{semi join} \label{sec:semi-join} <>= <> tmp <- semi_join_(authors,books, by.x = "surname", by.y = "name") tmp @ \subsection{anti join} \label{sec:semi-join} <>= <> tmp <- anti_join_(authors,books, by.x = "surname", by.y = "name") tmp tmp <- anti_join_(books,authors, by.x = "name", by.y = "surname") tmp @ \section{reshape: merge/spread} \label{sec:reshape} \subsection{merge} \label{sec:merge} <>= df3 <- data.frame(id = 1:4, age = c(40,50,60,50), dose.a1 = c(1,2,1,2), dose.a2 = c(2,1,2,1), dose.a14 = c(3,3,3,3)) df3 gather_(df3,pivot = c("id","age")) @ \subsection{spread} \label{sec:spread} <>= <> df4 <- gather_(df3,pivot = c("id","age")) df5 <- rbind(df4, data.frame(id=5, age=20,parameters="dose.a14",values=8), data.frame(id=6, age=10,parameters="dose.a1",values=5)) df5 spread_(df5,col_name="parameters",col_values="values",pivot=c("id","age")) @ \end{document}