#' ---
#' title: "R intro"
#' author: ""
#' date: ""
#' output: html_document
#' ---
#'
#' ## PART I: R Basics
#' 
#' ### Basic math
2+2
1/3  # R as standard has more digits than it shows
print(1/3, digits = 12)
sqrt(2)
#' 
#' Saving the results
a <- 34+45  # can also use = instead of <-
a
#' 
#' ### Vectors
c(1, 5, -4) # c means combine/concatenate
v <- c(2, 3, -5); v  # several commands on one line are separated with semi-colon 
w <- 1:3; w  # colon is used for sequences of numbers
w <- seq(1, 5, by = 2) # `seq` is used for more general regular sequences
v+w  # adding vectors
v[2]  # extracting element 2 of v
v[2:3] # extracting element 2 and 3 of v
c(v, w) # easy to join several vectors into a long vector

#' ### Matrices
A <- matrix(1:9, 3, 3); A  # written by column
B <- matrix(1:9, 3, 3, byrow=TRUE); B  # written by row
C <- cbind(v, w); # bind vectors v and w as columns of a matrix

#' Manipulating elements
A[2,2]  # entry [2,2]
A[,2]  # column 2
A[2,]  # row 2
A[3,3] <- 0 # changing element at entry [3,3] in a matrix
A

#' All operations on vectors and matrices are elementswise
A+B  # fine for matrix sum
A*B  # not a proper matrix product - elementwise product
A^{-1}  # not proper matrix inverse - elementwise inverse

#' ### Help
#' All built-in functions have help pages accessible by `?`
#+ eval=FALSE
?matrix
#' If you don't know the name of the command, Google is your friend
#'
#' ### Data frames
#' Data frames look like matrices and they are the default data format in R
data.frame(1:4, 5:2, c(1,3,4,7))  # vectors get a column each
y <- data.frame(height = c(178, 182, 171), weight = c(72, 76, 71))  # the columns can be given meaningful names
y
y[,2]  # same extraction notation as matrices
y$weight  # or use $ with column name
str(y) # Gives the structure of any R object. Very useful for complicated objects.
#'
#' importing data using `read.table()`/`read.csv()`
#' No header is assumed by default in `read.table()`
beetles_url <- "https://asta.math.aau.dk/course/bayes/2021/?file=./beetles.dat"
beetles <- read.table(beetles_url)
head(beetles)
#' First line is assumed to be header by default in `read.csv()`
newcomb_url <- "https://asta.math.aau.dk/course/bayes/2021/?file=./newcomb.csv"
newcomb <- read.csv(newcomb_url)
head(newcomb)
#'
#' The `class` function is useful for figuring out what you are working with
class(newcomb)  # a data frame
class(newcomb[,1])  # a integer vector (almost the same as numeric in R)
class(newcomb[,2])  # a vector of floating point numbers (numeric)
str(newcomb) # Again, this is the best way to see all this in one go
#'
#' ### Plotting
#' The generic plot function
x <- newcomb$day
y <- newcomb$time
plot(x, y)
plot(y ~ x)  # the same, ~ means "as function of", note different order
plot(x, y, pch=3, col="red")  # many things can be controlled in plot
plot(newcomb)  # complex objects can also be put into plot - what it does depends on the object
#' Note: `plot` is a generic function, that under the hood dispatches to other functions
#' e.g. `plot.default` or `plot.data.frame`.  
#' Even though you only write `plot(...)` you may still need to look at the help
#' for these other functions
#'
#' ### Graphical data summaries
hist(y, breaks=20)  # good for getting an overview of data
boxplot(y ~ x)  # good for comparing data

#' ### Numerical data-summaries
mean(y)  # average
median(y)  # median
var(y)  # variance
sd(y)  # standard deviation
range(y)  # range
quantile(y)  # quantiles
summary(y)  # can be used for most objects in R
#'
#' ## PART II: Distributions in R
#' 
#' R can be used for calculating density functions and distribution functions of most known distributions
dnorm(1, mean=0, sd=1)  # density function for normal dist. with mean 0 and sd 1 evaluated at 1
pnorm(1, mean=0, sd=1)  # Corresponding distribution function
#'
#' let's plot them - the `curve` function is useful for plotting mathematical functions in R
curve(dnorm(x, mean=0, sd=1), from=-5, to=5)  # x must be called x in curve
curve(pnorm(x, mean=0, sd=1), from=-5, to=5)
#'
#' the inverse distribution function (quantile function)
qnorm(0.8, mean=0, sd=1)
#'
#' R can also simulate the distributions
rnorm(5, mean=0, sd=1)  # 5 simulations
hist(rnorm(1000, mean=0, sd=1))  # histogram of 1000 simulations
#'
#' Functions for handling distributions come with the following naming convention:  
#' First part:  
#' `d` = **D**ensity function   
#' `p` = distribution function (cumulative **P**robability)  
#' `q` = inverse distribution function (**Q**uantile)  
#' `r` = simulate (**R**andom number generator)  
#' Last part: distribution name, e.g:  
#' `norm` = normal  
#' `exp` = exponential  
#' `gamma` = gamma  
#' `beta` = beta  
#' `binom` = binomial  
#' etc.
curve(dgamma(x, shape = 3, rate = 0.5), from=0, to=20)
#'
#' ### Multivariate distribution -- example of a package
#' If R does not contain the functions/statistics you need, odds are that somebody has implemented it in a package.
#' Installing a package - (here a package for multivariate t and normal distribution)
#+ eval=FALSE
install.packages("mvtnorm")  # only need to install it once

#' ### Loading a package
library(mvtnorm)  # need to do this everytime R is started

#' In Rstudio you can also use the Packages tab (lower right panel by default).
#'
#' Now the following functions work for calculating the density of and simulating multivariate normal distributions
dmvnorm(c(1,2,2), mean = rep(0,3), sigma = diag(3))  # evaluating the 3-dim standard normal density
rmvnorm(5, mean = rep(0,3), sigma = diag(3)) # every row is a simulation

#' If you don't know what a package contains, you can try
#+ eval=FALSE
library(help = "mvtnorm")

#' It is possible to use a function from a package without loading the package
#' first via `library`. This is done using the :: notation like this:
mvtnorm::rmvnorm(5, mean = rep(0,3), sigma = diag(3))

#' There are thousands of other packages for specific needs.
#'
#' ## PART III: Programming -- functions, loops, etc.
#'
#' ### For-loop
#' Calculating 1+2+...+10 as an example
s <- 0
for (i in 1:10){
  s <- s + i
}  # any vector can be used instead of 1:10
s
#' Calculating the first ten Fibonacci numbers
f <- rep(0,10)
f[1] <- f[2] <- 1
for (i in 3:10){
  f[i] <- f[i-2]+f[i-1]
}
f
#' Note that built-in functions are usually faster than for-loops created from scratch
#'
#' ### If-then-else conditions
#' Determining the sign of a number
x <- -3
if (x<0) {
  signx <- -1 
} else{
  if (x==0){
    signx <- 0
  } else{
    signx <- 1
  }
}
signx

#' ### Functions
#' A function for finding the sign of a number
signfct <- function(x){
  # syntax: fct_name <- function(input1, input2, ...){blablabla}
  signx <- 0 # Assume 0 until found otherwise
  if (x<0) {
    signx <- -1 
  }
  if (x>0){
    signx <- 1
  }
  return(signx)
}
signfct(-3); signfct(0); signfct(0.2)
#' There is a built-in function `sign`
sign(-3); sign(0); sign(0.2)
sign(-3:4)  # this will even take vectors or matrices
signfct(-3:4)  # our function is not that smart, since `if` only accepts a single value
#' Morale: always think about all the types of input you would like to have and
#' try them out.