R tutorial
You can install R from CRAN or download a copy of portable edition with some extras (Notepad++ and NppToR added). This later can be run after unpacking the archive with a batch file.
Introduction course without many comments (it’s kind of self explanatory):
1+1 (4+8)*4 4^2 sqrt(9) |
Vectors:
Matrices:
Data frames:
mtcars ?mtcars str(mtcars) ?str hist(mtcars$mpg) hist(mtcars$mpg, main="My first plot in R!") ?hist hist(mtcars$mpg, main="My first histogram in R!", col="green", density=20) abline(2,0, col="red", lty=2, lwd=2) summary(mtcars$mpg) summary(mtcars) table(mtcars$gear) pie(table(mtcars$gear)) t <- table(iris$Species) pie(t) str(iris) iris$Species class(iris$Species) levels(iris$Species) cor(iris[, 1:4]) pairs(iris[, 1:4]) ?pairs panel.cor <- function(x, y, digits=2, prefix="", cex.cor, ...) { usr <- par("usr"); on.exit(par(usr)) par(usr = c(0, 1, 0, 1)) r <- abs(cor(x, y)) txt <- format(c(r, 0.123456789), digits=digits)[1] txt <- paste(prefix, txt, sep="") if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt) text(0.5, 0.5, txt, cex = cex.cor * r) } pairs(iris[, 1:4], upper.panel=panel.cor) pairs(iris[1:4], main="Edgar Anderson's Iris Data", pch=21, bg = c("red", "green3", "blue")[unclass(iris$Species)]) |
Descriptives:
mean(mtcars) max(mean(mtcars)) which.max(mean(mtcars)) mtcars[, which.max(mean(mtcars))] table(mtcars$gear) prop.table(table(mtcars$gear)) table(mtcars$gear, mtcars$am) prop.table(table(mtcars$gear, mtcars$am)) library(gmodels) install.packages('gmodels') CrossTable(mtcars$gear, mtcars$am) mosaicplot(table(mtcars$gear, mtcars$am)) mosaicplot(table(mtcars$gear, mtcars$am), shade=TRUE, color=TRUE) t <- chisq.test(table(mtcars$gear, mtcars$am)) t str(t) t$expected V <- function(x) sqrt(x$statistic^2/(sum(x$observed)*x$parameter)) V(t) V <- function(x) as.numeric(sqrt(x$statistic^2/(sum(x$observed)*x$parameter))) V(t) bickel <- data.frame(gender=c(rep("Male", 8442), rep("Female", 4321)), admit=c(rep("Admitted", 3738), rep("Deny", 4704), rep("Admitted", 1494), rep("Deny", 2827))) mosaicplot(table(bickel), shade=T) library(XML) theurl <- "http://en.wikipedia.org/wiki/Simpson's_paradox" tables <- readHTMLTable(theurl) xtable(tables[[6]]) |
Distributions:
hist(rnorm(100, 0.5, 0.1)) hist(rnorm(100, 0.5, 0.1)) curve(dnorm(x,0.5,0.1), add=TRUE, col="red") curve(pnorm(x,0.5,0.1)) ?rnorm ?rbeta |
Linear models:
fit <- lm(mpg~disp+hp+wt+drat, data=mtcars) library(car) # John Fox outlierTest(fit) qqPlot(fit, main="QQ Plot") leveragePlots(fit) predict(fit) cbind(mtcars$mpg, predict(fit)) plot(cbind(mtcars$mpg, predict(fit))) |
Factor/PC:
fit <- princomp(mtcars, cor=TRUE) fit summary(fit) loadings(fit) plot(fit,type="lines") fit$scores biplot(fit) prcomp(mtcars, cor=TRUE) prcomp(mtcars, cor=TRUE, scale=TRUE) fit <- factanal(mtcars, 3) fit <- factanal(mtcars, 3, rotation="none") print(fit, digits=1, cutoff=.3, sort=TRUE) load <- fit$loadings[, 1:2] plot(load, type="n") text(load,labels=names(mtcars),cex=.7) library(FactoMineR) result <- PCA(mtcars) library(GPArotation) |
Check out ggplot: a plotting system for R, based on the grammar of graphics
library(ggplot2) ?diamonds str(diamonds) ggplot(diamonds, aes(clarity)) + geom_bar() ggplot(diamonds, aes(clarity)) + geom_bar() + ylab("N") ggplot(diamonds, aes(clarity)) + geom_bar() + ylab("N") + facet_wrap(~ cut) ggplot(diamonds, aes(clarity)) + geom_bar() + ylab("N") + facet_wrap(~ cut) + theme_bw() + opts(title="53.940 diamonds") ggplot(diamonds, aes(carat, price)) + geom_point() + geom_smooth() + ylab('') + scale_y_continuous(formatter="dollar") + theme_bw() + opts(title="53.940 diamonds") ggplot(diamonds, aes(cut, price)) + geom_boxplot() + xlab('') + ylab('') + scale_y_continuous(formatter="dollar") + theme_bw() + opts(title="53.940 diamonds") |
Loading/saving data:
library(foreign) df <- read.spss('http://ppke.snowl.net/files/2010/08/ESS_HUN_4_media1.sav', to.data.frame=TRUE) str(df) attr(df, "variable.labels") boxplot(f7 ~ f2_1, df) df <- read.csv2('http://ppke.snowl.net/files/2010/08/mds_telepulesek.csv', fileEncoding='cp1252') row.names(df) <- df[,1] df[,1] <- NULL df <- as.dist(df) mds <- cmdscale(df) mds plot(mds) text(mds[,1], mds[,2], row.names(mds)) |
Suggested IDEs (Integrated Development Environment):
* Notepad++
* RStudio
* Eclipse + Statet
* Emacs + ESS