Basics
· R is case sensitive, object-oriented
· A command ends with a semi-colon (;). The last semi-colon can be omitted.
· A comment begins with # regardless of its location. The single quotes ('') and double quotes ("") are used interchangably.
· Packages contains data sets and functions, are accessed through library().
· Objects include vectors, lists, data frames, matrices (array), and factors.
· An R list is an object consisting of an ordered collection of objects known as its components. Lists are a general form of vector in which the various elements need not be of the same type, and are often themselves vectors or lists.
· Data frames are matrix-like structures, in which the columns can be of different types. A data frame is a list with class "data.frame".
· A factor is a vector object used to specify a discrete classification (grouping) of the components of other vectors of the same length.
· Matrices or more generally arrays are multi-dimensional generalizations of vectors. An array can be considered as a multiply subscripted collection of data entries
· The "pi" is the constant 3.141592654. The "NA" indicates a missing value (default).
· The "pkg" (package); "d" (data frame); "m" (matrix); "v" (vector), url, file (file), obj (objects), fit (fitted model), n (number); s (string).
Basic Commands
· * quit(); q()
· * help(command); help.start()
· * search(); help.search()
· * dir(); methods()
· * library(p); identify(); attach(); detatch()
· * remove(); rm()
· * start:end; c(); rep(); seq()
· * scan(); print(); str(); ls()
· * cat(); cat("concaternate", c, "and print", "\t")
· * options(prompt='.', continue="///", digits=10); getOption("width")
· * source(); source.url() /* run commands in a file */
·
Simple examples
· library() # list packages available
· library(car) # load a package
· list(data()) # list data sets in the current package
· summary(Davis)
· list(Davis)
· list(Davis$weight)
· stem(Davis[,2]) # equal to stem(Davis$weight)
· stem(Davis$height, scale=4)
· boxplot(Davis$weight)
· w<-Davis$weight
· h<-Davis$height
· plot(w ~ h)
· cor(Davis[,c(2:3)])
· cor.test(w,h)
· t.test(Davis[,2], mu=65)
· t.test(Davis$height, Davis$weight, mu=100, paired=FALSE)
· var.test(Davis$height, Davis$weight)
· d<=read.csv("c:/temp/R/nes.csv", header=TRUE)
· list(names(d)) # list variable names
OPERATOR/FUNCTION
Operators
· * <- (left assignment), -> (right assignment)
· * +, -, *, /, ^, %% (modulus)
· * >, >=, <, <=, == (equal), != (not equal)
· * & (and), | (or)
· * %*% (matrix product); %/% (division)
· * %o% (Outer product); %x% (Kronecker product)
· * %in% (Matching operator);
Functions
* abs(); sin(); cos(); tan(); exp(); sqrt(); min(); max()
* log(); log(v,10); log10(); log2(); log(v, base=10)
* mean(); sum(); median(); range(); var(); sd()
* rank(); ave(v, group); by(group)
* c(a, b, c); c(start:end); seq(start:end); seq(10, 100, by=5)
* rep(n, time); rep(7, 3); rep(start:end, time)
* rep(1:3, c(2,2,2)); rep(1:3, each=2); rep(1:3, c(1:3))
* seq(1,4); seq(1,10, by=2); seq(0,1, length=10)
* length(), sort(), order(); rev(v) ## to reverse
* dnorm(1.96); dt(1.96, 100); df(1.96, 1, 100); dchisq(1.96, 10)
* pnorm(1.96); pt(1.96, 100); pf(1.96, 1, 100); pchisq(1.96, 10)
* rpois(n, lamda); rnorm(n); rt(n, df); rt(n, df=c(1:10)); rexp(n)
* substring(s, start, stop); substr(s, start, stop); nchar(s)
* date()
* mode() ## type of object
INPUT OUTPUT
Reading Text Files
* source(f); /* to execute commands in the file */
* read.table(f); read.table.url(url)
* download.file(url); url.show(url)
* m<-read.table("f:/temp/cigar.txt", header=TRUE)
* m<-read.table('f:/temp/cigar.txt')
* names(m)<-c("a", "b", "c")
* read.csv(f, header=TRUE, sep=",", quote="\"", dec=".")
* read.csv2(f, header=TRUE, sep=";", quote="\"", dec=",")
* read.delim(f, header=TRUE, sep="\t", quote="\"", dec=".")
* read.delim2(f, header=TRUE, sep="\t", quote="\"", dec=",")
* m<-read.csv("nes2.csv, header=TRUE)
* read.fwf(file, widths=c(3,5,3), header="FALSE, sep="", as.is=FALSE)
* as.is=TRUE; as.is=T # not to be converted into a factor
* na.strings<-c(".", "NA", "", "#") # characters for missing
* cnt=count.fields(df); which(cnt=7);
Reading Data Frames
* load(d);
* data(d);
data(d, package="p")
* data.frame(v1, v2) /* to make a data frame out of vectors */
* m3<-data.frame(as.matrix(m[,2:4]))
* m2<-edit(m); m2<-edit(data.frame(m)) # modify the dataframe
* data.entry(df)
Handling Data
* m2<-match(v1, v2, nomatch=0) # data merging
* m2<-match(m[,1], m[,3])
merge(df1, df2, by=’name) #merge two data frames by common column
Writing Data
* cat(); print()
* cat("y x1 x2", "2 4 2", "5 2 7", file="sample.txt", sep="\n")
* write.(obj, f)
* write.table(df, file='firms.csv', sep=",", row.names=NA, col.names=NA)
* save(f, obj); save.image(f)
* sink(); format()
MATRICES
Defining Matrices
* m<-c(1, 2, 3, 4); c(1, 2, 3, 4)->m; assign("m", c(1, 2, 3, 4))
* m<-data.frame(column1=c(1,2,3), column2=c(4,5,6)); ## 2 by 3
* rep(c(1,2,3), 2); rep(c(1,2,3), each=2);
* rep(c(1,2,3), c(2,2,2,)); m<-c(c1=15, c2=54, c3=50)
* seq(1,4); seq(1,10, by=2); seq(0,1, length=10);
* intm<-1:4; intm<-numeric(); intm[1]m<-1; intm[2]m>-2
* strm
* blm<-c(T,F); blm<-v1>10; ## a boolean vector of TRUE and FALSE
* m<-scan()
* mm<-matrix(1:12,4); mm<-matrix(1:12, nrow=4)
* mm<-matrix(1:12, ncol=3); mm<-matrix(1:12, nrow=4)
* mm<-matrix(1:12, nrow=4, ncol=3); mm<-matrix(1:12, 4, 3)
* arrm<-array(1:10); arrm<-array(1:10, dim=c(2,5))
* cbind(); rbind(); gl(); expand.grid()
* list()
Referring Matrices
* m[,2]; v=m[2,]; m[-1, -3] ## to extract elements
* m[c(1, 5, 6)]; m2=m[-c(1, 5, 6)] ## to extract elements
* m<-c(c1=15, c2=54, c3=50); m<-c("c1", "c3")
* m2<-m$c2; m2<-m[,2]; m2<-m[,"c2"]; m2<-m[[2]]
* m[,3:5]; m3<-m[,c(3, 4, 5)]; m3<-m[,c("c3", "c4", "c5")]
* m<-c(4, 2, 4); names(m)<-c("Grape", "Pear", "Apple")
* m1$v2 /*variable 2 of the data frame 1*/
* white(); which.max(); which(min)
* attr(m, which); attributes(obj)
Matrix Functions
* t(); det(); rank(); eigen(); diag(); prod(); crossprod()
* sum(); mean(); var(); sd(); min(); max(); prod(); cumsum(); cumprod()
* is.na(m) ## to check if m contains a missing value
* rowsum(); colsum(); nrow(); ccol()
* dim(m); dimnames(m)
* merge(df1, df2)
* as.factor(); as.matrix(), as.vector(); /* conversion*/
* is.factor(); is.matrix(), is.vector();
* class(); unclass()
* na.omit(); na.fail(); unique(); table(); sample()
* as.array(); as.data.frame()
* as.numeric(); as.characters(); as.logical(); as.complex()
REGRESSION
Ordinary Least Squares (OLS)
* lm(); glm()
* m.ols<-lm(v1~v2+v3, data=m) ## linear model
* lm(v1~v2+v3, data=m); summary(lm(v1~v2+v3, data=m)); summary(m.ols)
* names(m.ols); coef(m.ols); fitted(m.ols); resid(m.ols)
* predict(fit); AIC(fit); logLik(fit); deviance(fit)
* model.matrix(v1~v2+v3, data=m)
* m.ols2<-model.matrix(v1~v2+v3, data=m); summary(m.ols2)
Binary Response Regressions
* m.logit<-glm(v1~v2+v3,family=binomial(link=logit),data=m)
* summary(m.logit); coef(m.logit); fitted(m.logit); resid(m.logit)
* lsfit(v1,v2)
* nls(); m.nonlin<-lm(v1~v2+v2^2, data=m)
* anova(m.ols, m.nonlin)
* m.qr<-qr(m) ## QR Decomposition of a Matrix
STATISTICS
Descriptives
* summary(m); fivenum(m)
* stem(v); boxplot(v); boxplot(v1, v2); hist(v)
* qqnorm(v); qqline(v)
* rug(); lines()
* table() /*to make a table*/
* tabulate()
Multivariate Analysis
* cor(m); cor(sqrt(m)) ## Pearson correlation
* cor.test(v1, v2)
* prcomp() /* Principal components in the mva package*/
* kmeans() /* Kmeans cluster analysis in the mva package*/
* factanal() /* Factor analysis in the mva package*/
* cancor() /* Canonical correlation in the mva package*/
Categorical Data Analysis
* chisq.test(v1,v2) ## Pearson Chi-squared Test
* fisher.test(v1,v2) ## Fisher Exact Test
* friedman.test(v1,v2) ## Friedman Test
* prop.test(); binom.test() ## sign test
* kruskal.test(v1,v2) ## Kruskal-Wallis Rank Sum Test
* wilcox.test(v1,v2) ## Wilcoxon Rank Sum (Mann-Whitney) Test
* ks.test(v1,v2) ## Two Sample Kolmogorov-Smirnov Test
* bartlett.test(v1,v2) ## Bartlett Test for Homogeneity of Variances
ANOVA
T-test
* t.test(v1,v2); t.test(v1,v2, var.equal=FALSE)
* t.test(v1,v2, mu=0 paired=FALSE)
* t.test(v1.v2, mu=10, paired=F, var.equal=T)
* power.t.test(v1,v2); pairwise.t.test()
* var.test(v1,v2) ## F test for equal variance
ANOVA
* m.anova<-aov(v1~v2+v3, data=m)
* aov(); anova()
* summary(m.anova)
* power.anova.test() ## Power calculations for balanced one-way ANOVA tests
PROGRAMMING
Modules
frame_name<-function(arguments) {...}
mile.to.km<-function(mile) {mile*8/5}
km<-mile.to.km(c(35, 55, 75))
Flow Control
if (condition) {...} else if (condition) {...} else {...}
while (condition ) {...} # {} may be omitted for a single line expression
for (index in start:end) {...}
for (i in 1:100) {sum <- sum + i}
repeat {...}
switch (statement, list)
Programming Functions
* expression(); parse(); deparse(); eval()
* optim() /* general-purpose optimization */
* nlm() /* Newton algorithm */
* lm() /* linear models */
* nls() /* nonlinear least squares model */
GRAPHICS
Plotting
* plot(y~x, data=m, pch=16) # plotting character (pch)
* pairs(m) # scatterplot matrix
* xyrange<-range(m) # to get range of m
* plot(y~x, data=m, xlim=xyrange, ylim=xyrange)
* abline(0,1)
* plot((0:10), sin((1:10)*pi, type="1") # 1 joins the points
* barplot(); boxplot(); stem(); hist();
* matplot() /* matrix plot */
* pairs(m) /* scatterplots */
* coplot() /* conditional plot */
* stripplot() /* strip plot */
* qqplot(); qqnorm(); qqline() /* quantile0quantile plot */
Options
* points() # to add points to a plot
* lines() # to add lines
* text() # to add texts
* mtext() # to add margin texts
* axis() # to control axis
* par(cex=1.25 mex=1.25)
* par(mfrow=c(2,2), mfcol=c(1,1))
No comments:
Post a Comment