May Institute 2019: Beginner’s statistics in R

relative paths

?barplot
args(lm)
??kruskal
## iris is an example data frame that comes with R and head() is a
## function that returns the first part of the data frame
dput(head(iris))
## structure(list(Sepal.Length = c(5.1, 4.9, 4.7, 4.6, 5, 5.4), 
##     Sepal.Width = c(3.5, 3, 3.2, 3.1, 3.6, 3.9), Petal.Length = c(1.4, 
##     1.4, 1.3, 1.5, 1.4, 1.7), Petal.Width = c(0.2, 0.2, 0.2, 
##     0.2, 0.2, 0.4), Species = structure(c(1L, 1L, 1L, 1L, 1L, 
##     1L), .Label = c("setosa", "versicolor", "virginica"), class = "factor")), row.names = c(NA, 
## 6L), class = "data.frame")
save(iris, file="iris.rda")
load(file="iris.rds")
sessionInfo()
## R version 3.6.0 RC (2019-04-21 r76417)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/libf77blas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=fr_FR.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=fr_FR.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=fr_FR.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=fr_FR.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] compiler_3.6.0  magrittr_1.5    bookdown_0.9    htmltools_0.3.6
##  [5] tools_3.6.0     rstudioapi_0.10 yaml_2.2.0      Rcpp_1.0.1     
##  [9] stringi_1.4.3   rmarkdown_1.12  knitr_1.22      stringr_1.4.0  
## [13] digest_0.6.18   xfun_0.6        evaluate_0.13
install.package("ggplot2")
install.packages("BiocManager") ## only once
BiocManager::install("MSnbase")
3 + 5
## [1] 8
12 / 7
## [1] 1.714286
weight_kg <- 55
weight_kg <- 55    # doesn't print anything
(weight_kg <- 55)  # but putting parenthesis around the call prints the value of `weight_kg`
## [1] 55
weight_kg          # and so does typing the name of the object
## [1] 55
2.2 * weight_kg
## [1] 121
weight_kg <- 57.5
2.2 * weight_kg
## [1] 126.5
weight_lb <- 2.2 * weight_kg
weight_kg <- 100
mass <- 47.5            # mass?
age  <- 122             # age?
mass <- mass * 2.0      # mass?
age  <- age - 20        # age?
mass_index <- mass/age  # mass_index?
b <- sqrt(a)
round(3.14159)
## [1] 3
args(round)
## function (x, digits = 0) 
## NULL
?round
round(3.14159, digits=2)
## [1] 3.14
round(3.14159, 2)
## [1] 3.14
round(digits=2, x=3.14159)
## [1] 3.14
pep_lens <- c(17, 13, 7)
pep_lens
## [1] 17 13  7
peps <- c("VESITARHGEVLQLRPK", "IDGQWVTHQWLK", "LVILLFR")
peps
## [1] "VESITARHGEVLQLRPK" "IDGQWVTHQWLK"      "LVILLFR"
length(pep_lens)
## [1] 3
length(peps)
## [1] 3
class(pep_lens)
## [1] "numeric"
class(peps)
## [1] "character"
str(pep_lens)
##  num [1:3] 17 13 7
str(peps)
##  chr [1:3] "VESITARHGEVLQLRPK" "IDGQWVTHQWLK" "LVILLFR"
pep_lens <- c(pep_lens, 5)  # add to the end of the vector
pep_lens <- c(10, pep_lens) # add to the beginning of the vector
pep_lens
## [1] 10 17 13  7  5
num_char <- c(1, 2, 3, 'a')
num_logical <- c(1, 2, 3, TRUE)
char_logical <- c('a', 'b', 'c', TRUE)
tricky <- c(1, 2, 3, '4')
peps <- c("VESITARHGEVLQLRPK", "IDGQWVTHQWLK", "LVILLFR", "ARHGILPK")
peps[2]
## [1] "IDGQWVTHQWLK"
peps[c(3, 2)]
## [1] "LVILLFR"      "IDGQWVTHQWLK"
more_peps <- peps[c(1, 2, 3, 2, 1, 4)]
more_peps
## [1] "VESITARHGEVLQLRPK" "IDGQWVTHQWLK"      "LVILLFR"          
## [4] "IDGQWVTHQWLK"      "VESITARHGEVLQLRPK" "ARHGILPK"
pep_lens <- c(17, 12, 7, 8)
pep_lens[c(TRUE, FALSE, TRUE, FALSE)]
## [1] 17  7
pep_lens > 10   # will return logicals with TRUE for the indices that meet the condition
## [1]  TRUE  TRUE FALSE FALSE
## so we can use this to select only the values above 10
pep_lens[pep_lens > 10]
## [1] 17 12
pep_lens[pep_lens < 8 | pep_lens > 15]
## [1] 17  7
pep_lens[pep_lens <= 12 & pep_lens == 10]
## numeric(0)
peps <- c("VESITARHGEVLQLRPK", "IDGQWVTHQWLK", "LVILLFR", "ARHGILPK")
peps[peps == "IDGQWVTHQWLK" | peps == "LVILLFR"] # returns both peptides
## [1] "IDGQWVTHQWLK" "LVILLFR"
peps %in% c("IDGQWVTHQWLK", "LVILLFR", "SITARH", "VESITA", "ARHGILGHIIHKKP")
## [1] FALSE  TRUE  TRUE FALSE
peps[peps %in% c("IDGQWVTHQWLK", "LVILLFR", "SITARH", "VESITA", "ARHGILGHIIHKKP")]
## [1] "IDGQWVTHQWLK" "LVILLFR"
heights <- c(2, 4, 4, NA, 6)
mean(heights)
## [1] NA
max(heights)
## [1] NA
mean(heights, na.rm = TRUE)
## [1] 4
max(heights, na.rm = TRUE)
## [1] 6
## Extract those elements which are not missing values.
heights[!is.na(heights)]
## [1] 2 4 4 6
## Returns the object with incomplete cases removed. The returned object is atomic.
na.omit(heights)
## [1] 2 4 4 6
## attr(,"na.action")
## [1] 4
## attr(,"class")
## [1] "omit"
## Extract those elements which are complete cases.
heights[complete.cases(heights)]
## [1] 2 4 4 6
lens <- c(10, 24, NA, 18, NA, 20)
lens <- c(10, 24, NA, 18, NA, 20)
median(lens) ## NA
## [1] NA
lens2 <- na.omit(lens)
median(lens)
## [1] NA
median(lens, na.rm = TRUE)
## [1] 19

May Institute 2019: Beginner’s statistics in R

Chapter 1 Getting started with R and RStudio

1.1 R and RStudio

1.1.1 What is R? What is RStudio?

1.1.2 Why learn R?

R does not involve lots of pointing and clicking, and that’s a good thing

R code is great for reproducibility

R is interdisciplinary and extensible

R works on data of all shapes and sizes

R produces high-quality graphics

R has a large community

Not only is R free, but it is also open-source and cross-platform

1.1.3 Knowing your way around RStudio

1.1.4 Getting set up

1.1.5 Organizing your working directory

1.2 Interacting with R

1.3 Seeking help

Use the built-in RStudio help interface to search for more information on R functions

I know the name of the function I want to use, but I’m not sure how to use it

I want to use a function that does X, there must be a function for it but I don’t know which one…

I am stuck… I get an error message that I don’t understand

Asking for help

Where to ask for help?

1.3.1 More resources

1.4 Installing packages

1.5 Introduction to R

1.5.1 Creating objects in R

1.5.2 Comments

1.5.3 Functions and their arguments

1.5.4 Objects vs. variables

1.6 Vectors and data types

1.6.1 Conditional subsetting

1.7 Missing data

1.8 R markdown