#' --- #' title: "R intro" #' author: "" #' date: "" #' output: html_document #' --- #' #' ## PART I: R Basics #' #' ### Basic math 2+2 1/3 # R as standard has more digits than it shows print(1/3, digits = 12) sqrt(2) #' #' Saving the results a <- 34+45 # can also use = instead of <- a #' #' ### Vectors c(1, 5, -4) # c means combine/concatenate v <- c(2, 3, -5); v # several commands on one line are separated with semi-colon w <- 1:3; w # colon is used for sequences of numbers w <- seq(1, 5, by = 2) # `seq` is used for more general regular sequences v+w # adding vectors v[2] # extracting element 2 of v v[2:3] # extracting element 2 and 3 of v c(v, w) # easy to join several vectors into a long vector #' ### Matrices A <- matrix(1:9, 3, 3); A # written by column B <- matrix(1:9, 3, 3, byrow=TRUE); B # written by row #' Manipulating elements A[2,2] # entry [2,2] A[,2] # column 2 A[2,] # row 2 A[3,3] <- 0 # changing element at entry [3,3] in a matrix A #' All operations on vectors and matrices are elementswise A+B # fine for matrix sum A*B # not a proper matrix product - elementwise product A^{-1} # not proper matrix inverse - elementwise inverse #' ### Help #' All built-in functions have help pages accessible by `?` #+ eval=FALSE ?matrix #' If you don't know the name of the command, Google is your friend #' #' ### Data frames #' Data frames look like matrices and they are the default data format in R data.frame(1:4, 5:2, c(1,3,4,7)) # vectors get a column each y <- data.frame(height = c(178, 182, 171), weight = c(72, 76, 71)) # the columns can be given meaningful names y y[,2] # same extraction notation as matrices y$weight # or use $ with column name str(y) # Gives the structure of any R object. Very useful for complicated objects. #' #' importing data using `read.table()`/`read.csv()` #' No header is assumed by default in `read.table()` beetles_url <- "https://asta.math.aau.dk/course/bayes/2018/?file=./beetles.dat" beetles <- read.table(beetles_url) head(beetles) #' First line is assumed to be header by default in `read.csv()` newcomb_url <- "https://asta.math.aau.dk/course/bayes/2018/?file=./newcomb.csv" newcomb <- read.csv(newcomb_url) head(newcomb) #' #' The `class` function is useful for figuring out what you are working with class(newcomb) # a data frame class(newcomb[,1]) # a integer vector (almost the same as numeric in R) class(newcomb[,2]) # a vector of floating point numbers (numeric) str(newcomb) # Again, this is the best way to see all this in one go #' #' ### Plotting #' The generic plot function x <- newcomb$day y <- newcomb$time plot(x, y) plot(y ~ x) # the same, ~ means "as function of", note different order plot(x, y, pch=3, col="red") # many things can be controlled in plot plot(newcomb) # complex objects can also be put into plot - what it does depends on the object #' Note: `plot` is a generic function, that under the hood dispatches to other functions #' e.g. `plot.default` or `plot.data.frame`. #' Even though you only write `plot(...)` you may still need to look at the help #' for these other functions #' #' ### Graphical data summaries hist(y, breaks=20) # good for getting an overview of data boxplot(y ~ x) # good for comparing data #' ### Numerical data-summaries mean(y) # average median(y) # median var(y) # variance sd(y) # standard deviation range(y) # range quantile(y) # quantiles summary(y) # can be used for most objects in R #' #' ## PART II: Distributions in R #' #' R can be used for calculating density functions and distribution functions of most known distributions dnorm(1, mean=0, sd=1) # density function for normal dist. with mean 0 and sd 1 evaluated at 1 pnorm(1, mean=0, sd=1) # Corresponding distribution function #' #' let's plot them - the `curve` function is useful for plotting mathematical functions in R curve(dnorm(x, mean=0, sd=1), from=-5, to=5) # x must be called x in curve curve(pnorm(x, mean=0, sd=1), from=-5, to=5) #' #' the inverse distribution function (quantile function) qnorm(0.8, mean=0, sd=1) #' #' R can also simulate the distributions rnorm(5, mean=0, sd=1) # 5 simulations hist(rnorm(1000, mean=0, sd=1)) # histogram of 1000 simulations #' #' Functions for handling distributions come with the following naming convention: #' First part: #' `d` = **D**ensity function #' `p` = distribution function (cumulative **P**robability) #' `q` = inverse distribution function (**Q**uantile) #' `r` = simulate (**R**andom number generator) #' Last part: distribution name, e.g: #' `norm` = normal #' `exp` = exponential #' `gamma` = gamma #' `beta` = beta #' `binom` = binomial #' etc. curve(dgamma(x, shape = 3, rate = 0.5), from=0, to=20) #' #' ### Multivariate distribution -- example of a package #' If R does not contain the functions/statistics you need, odds are that somebody has implemented it in a package. #' Installing a package - (here a package for multivariate t and normal distribution) #+ eval=FALSE install.packages("mvtnorm") # only need to install it once #' ### Loading a package library(mvtnorm) # need to do this everytime R is started #' In Rstudio you can also use the Packages tab (lower right panel by default). #' #' Now the following functions work for calculating the density of and simulating multivariate normal distributions dmvnorm(c(1,2,2), mean = rep(0,3), sigma = diag(3)) # evaluating the 3-dim standard normal density rmvnorm(5, mean = rep(0,3), sigma = diag(3)) # every row is a simulation #' If you don't know what a package contains, you can try #+ eval=FALSE library(help = "mvtnorm") #' It is possible to use a function from a package without loading the package #' first via `library`. This is done using the :: notation like this: mvtnorm::rmvnorm(5, mean = rep(0,3), sigma = diag(3)) #' There are thousands of other packages for specific needs. #' #' ## PART III: Programming -- functions, loops, etc. #' #' ### For-loop #' Calculating 1+2+...+10 as an example s <- 0 for (i in 1:10){ s <- s + i } # any vector can be used instead of 1:10 s #' Calculating the first ten Fibonacci numbers f <- rep(0,10) f[1] <- f[2] <- 1 for (i in 3:10){ f[i] <- f[i-2]+f[i-1] } f #' Note that built-in functions are usually faster than for-loops created from scratch #' #' ### If-then-else conditions #' Determining the sign of a number x <- -3 if (x<0) { signx <- -1 } else{ if (x==0){ signx <- 0 } else{ signx <- 1 } } signx #' ### Functions #' A function for finding the sign of a number signfct <- function(x){ # syntax: fct_name <- function(input1, input2, ...){blablabla} signx <- 0 # Assume 0 until found otherwise if (x<0) { signx <- -1 } if (x>0){ signx <- 1 } return(signx) } signfct(-3); signfct(0); signfct(0.2) #' There is a built-in function `sign` sign(-3); sign(0); sign(0.2) sign(-3:4) # this will even take vectors or matrices signfct(-3:4) # our function is not that smart, since `if` only accepts a single value #' Morale: always think about all the types of input you would like to have and #' try them out.