Commit 7473b9c2 authored by Bernd Klaus's avatar Bernd Klaus
Browse files

edited text up to data frames section

parent abe40aab
......@@ -4,3 +4,4 @@ Predoc_Course_2016_cache/
Predoc_Course_2016_files/
omics_practicals.pdf
R-lab_files/
R-lab_cache/
## ----options, include=FALSE----------------------------------------------
library(knitr)
options(digits=3, width=80)
opts_chunk$set(echo=TRUE,tidy=FALSE,include=TRUE,
dev='png', fig.width = 6, fig.height = 3.5, comment = ' ', dpi = 300,
options(digits = 3, width = 80)
opts_chunk$set(echo= TRUE, tidy=FALSE,include = TRUE,
dev= 'png', fig.width = 6, fig.height = 3.5,
comment = ' ', dpi = 300,
cache = TRUE)
## ----required_packages, echo = TRUE, warning=FALSE, results="hide"-------
## ----required_packages, echo = FALSE, warning=FALSE, results="hide"------
suppressPackageStartupMessages({
library("TeachingDemos")
library("openxlsx")
library("multtest")
library("Biobase")
library("tidyverse")
library("cowplot")
})
## ----required packages and data, echo = TRUE-----------------------------
library("TeachingDemos")
library("openxlsx")
library("multtest")
library("Biobase")
library("tidyverse")
library("cowplot")
## ----ex-O-1, echo = TRUE-------------------------------------------------
## ----simple_ex, echo = TRUE----------------------------------------------
4 + 6
## ----ex-O-2, echo = TRUE-------------------------------------------------
## ----simple_ex_2, echo = TRUE--------------------------------------------
x <- 6
y <- 4
z <- x+y
z
## ----ex-O-3, echo = TRUE-------------------------------------------------
## ----simple_ex_3, echo = TRUE--------------------------------------------
ls()
## ----ex-O-4, echo = TRUE-------------------------------------------------
## ----simple_ex_4, echo = TRUE--------------------------------------------
sqrt(16)
## ----ex-O-5, echo = TRUE-------------------------------------------------
z <- c(5,9,1,0)
## ----simple_ex_5, echo = TRUE--------------------------------------------
z <- c(5, 9 , 1 ,0)
## ----ex-O-5b, echo = TRUE------------------------------------------------
x <- c(5,9)
y <- c(1,0)
z <- c(x,y)
## ----simple_ex_5b, echo = TRUE-------------------------------------------
x <- c(5, 9)
y <- c(1, 0)
z <- c(x, y)
## ----ex-O-6, echo = TRUE-------------------------------------------------
seq(1,9,by=2)
seq(8,20,length=6)
## ----simple_ex_6, echo = TRUE--------------------------------------------
seq(1, 9, by=2)
seq(8, 20, length=6)
## ----ex-O-7, echo = TRUE-------------------------------------------------
x <- seq(1,10)
## ----simple_ex_7, echo = TRUE--------------------------------------------
x <- seq(1, 10)
## ----ex-O-8, echo = TRUE-------------------------------------------------
## ----simple_ex_8, echo = TRUE--------------------------------------------
rep(1:3,6)
## repeat each element six times
rep(1:3,c(6,6,6))
## simplified
rep(1:3,rep(6,3))
rep(1:3, times = c(6,6,6))
## ----vec-----------------------------------------------------------------
x <- 1:5; y <- 5:1
x
y
x <- 1:5
y <- 5:1
x + y
x^2
# another example
x <- c(6,8,9)
y <- c(1,2,4)
x + y
x * y
## ----vec-2---------------------------------------------------------------
......@@ -73,7 +72,7 @@ x + 2
## ----calcEx, eval = FALSE------------------------------------------------
## x + cos(pi/y)
## ----summary-1, echo = TRUE--------------------------------------------
## ----summary_1, echo = TRUE--------------------------------------------
x <- c(7.5,8.2,3.1,5.6,8.2,9.3,6.5,7.0,9.3,1.2,14.5,6.2)
mean(x)
var(x)
......@@ -85,21 +84,18 @@ x[7:12]
summary(x[1:6])
summary(x[7:12])
## ----subscr-2, echo = TRUE---------------------------------------------
## ----subscr_2, echo = TRUE---------------------------------------------
x[c(2,4,9)]
x[-(1:6)]
# compare to
x[7:12]
## ----sort-rank, echo = TRUE--------------------------------------------
x <- c(1.3,3.5,2.7,6.3,6.3)
x <- c(1.3, 3.5, 2.7, 6.3, 6.3)
sort(x)
order(x)
x[order(x)]
rank(x)
## ----object-examples, echo = TRUE--------------------------------------
#assign value "9" to an object
a <- 9
# is a a string?
is.character(a)
......@@ -118,34 +114,26 @@ class(a)
str(a)
## ----cbind-ex, echo = TRUE---------------------------------------------
x <- c(5,7,9)
y <- c(6,3,4)
x <- c(5, 7 ,9)
y <- c(6, 3 ,4)
z <- cbind(x,y)
z
## dimensions: 3 rows and 2 columns
dim(z)
### matrix constructor
z <- matrix(c(5,7,9,6,3,4),nrow=3)
## ----matrix_direct, echo = TRUE----------------------------------------
z <- matrix(c(5, 7, 9, 6, 3, 4), nrow = 3)
## ----Matrix-ex, echo = TRUE--------------------------------------------
z <- matrix(c(5,7,9,6,3,4),nr=3,byrow=T)
z <- matrix(c(5, 7 , 9 ,6 ,3 , 4), nrow = 3, byrow = TRUE)
z
## ----Matrix-op, echo = TRUE--------------------------------------------
y <- matrix(c(1,3,0,9,5,-1),nrow=3,byrow=T)
y <- matrix(c(1, 3, 0, 9, 5, -1), nrow = 3, byrow = TRUE)
y
y + z
y * z
## ----Matrix-op-2, echo = TRUE------------------------------------------
x <- matrix(c(3,4,-2,6),nrow=2,byrow=T)
x
y %*% x
## ----Matrix-op-3, echo = TRUE------------------------------------------
t(z)
solve(x)
## ----Matrix-op-4, echo = TRUE------------------------------------------
z[1,1]
z[,2]
......
......@@ -7,7 +7,7 @@ output:
toc: true
toc_float: true
highlight: tango
code_folding: hide
code_folding: show
BiocStyle::pdf_document2:
toc: true
highlight: tango
......@@ -23,9 +23,10 @@ rmarkdown::render('R-lab.Rmd', BiocStyle::pdf_document2())
```{r options, include=FALSE}
library(knitr)
options(digits=3, width=80)
opts_chunk$set(echo=TRUE,tidy=FALSE,include=TRUE,
dev='png', fig.width = 6, fig.height = 3.5, comment = ' ', dpi = 300,
options(digits = 3, width = 80)
opts_chunk$set(echo= TRUE, tidy=FALSE,include = TRUE,
dev= 'png', fig.width = 6, fig.height = 3.5,
comment = ' ', dpi = 300,
cache = TRUE)
```
......@@ -33,18 +34,24 @@ cache = TRUE)
# Required packages and other preparations
```{r required_packages, echo = TRUE, warning=FALSE, results="hide"}
```{r required_packages, echo = FALSE, warning=FALSE, results="hide"}
suppressPackageStartupMessages({
library("TeachingDemos")
library("openxlsx")
library("multtest")
library("Biobase")
library("tidyverse")
library("cowplot")
})
```
```{r required packages and data, echo = TRUE}
library("TeachingDemos")
library("openxlsx")
library("multtest")
library("Biobase")
library("tidyverse")
library("cowplot")
```
# Introduction and getting help
......@@ -64,46 +71,49 @@ at which point you will also be prompted as to whether or not you want to save t
workspace into your working directory. If you do not want to, it will be lost. Remember the ways to get help:
* Just ask!
* `help.start()` and the HTML help button in the Windows GUI.
* `help} and `?}: `help("data.frame")` or `?help`.
* `help.search()}, `apropos()`
* `browseVignettes("package")`
* rseek.org
* use tab--completion in RStudio, this will also display help--snippets
* Just ask!
* `help.start()` and the HTML help button in the Windows GUI.
* `help} and `?}: `help("data.frame")` or `?help`.
* `help.search()}, `apropos()`
* `browseVignettes("package")`
* rseek.org
* use tab--completion in RStudio, this will also display help--snippets
In this tutorial we will make use of packages from the [tidyverse](https://cran.r-project.org/web/packages/tidyverse/vignettes/manifesto.html)
and was written using [rmarkdown](http://r4ds.had.co.nz/r-markdown.html). The tidyverse
is a set of R packages that try to make your life easier when working with data
in R. They improve the basic R experience tremendously and are designed to foster the human
understanding of programming code.
# Elementary objects and arithmetics
# Basics -- objects and arithmetic
R stores information in objects and operates on objects. The simplest objects are scalars, vectors and matrices.
But there are many others: lists and data frames for example. In advanced use of R it can also be
useful to define new types of objects, specific for particular application. We will stick with just the most commonly used objects here.
An important feature of R is that it will do different things on different types of objects. For
example, type:
The elementary unit in R is an object: and the simplest objects are scalars,
vectors and matrices. R is designed with interactivity in mind, so you can get
started by simply typing:
```{r ex-O-1, echo = TRUE}
```{r simple_ex, echo = TRUE}
4 + 6
```
So, R does scalar arithmetic returning the scalar value 10. In fact, R returns a vector of
length 1 - hence the [1] denoting first element of the vector.
What does R do? It sums up the two numbers and returns the scalar value 10.
In fact, R returns a vector of length 1 - hence the [1] denoting first element of the vector.
We can assign objects values for subsequent use. For example:
```{r ex-O-2, echo = TRUE}
```{r simple_ex_2, echo = TRUE}
x <- 6
y <- 4
z <- x+y
z
```
would do the same calculation as above, storing the result in an object called z. We can look at
the contents of the object by simply typing its name. At any time we can list the objects which we have created:
would do the same calculation as above, storing the result in an object called `z`.
We can look at the contents of the object by simply typing its name. At any time we can
list the objects which we have created:
```{r ex-O-3, echo = TRUE}
```{r simple_ex_3, echo = TRUE}
ls()
```
......@@ -112,27 +122,27 @@ this object, in this case, the commands of the function. The use of parentheses,
the function is executed and its result --- in this case, a list of the objects in the current environment --- displayed.
More commonly, a function will operate on an object, for example
```{r ex-O-4, echo = TRUE}
```{r simple_ex_4, echo = TRUE}
sqrt(16)
```
calculates the square root of 16. Objects can be removed from the current workspace with the
function `rm()`. There are many standard functions available in R,
and it is also possible to create new ones. Vectors can be created in R in a number of ways.
We can describe all of the elements:
We can describe all of the elements:
```{r ex-O-5, echo = TRUE}
z <- c(5,9,1,0)
```{r simple_ex_5, echo = TRUE}
z <- c(5, 9 , 1 ,0)
```
Note the use of the function `c` to concatenate or "glue together" individual elements. This function
can be used much more widely, for example
```{r ex-O-5b, echo = TRUE}
x <- c(5,9)
y <- c(1,0)
z <- c(x,y)
```{r simple_ex_5b, echo = TRUE}
x <- c(5, 9)
y <- c(1, 0)
z <- c(x, y)
```
......@@ -142,9 +152,9 @@ Sequences can be generated as follows:
```{r ex-O-6, echo = TRUE}
seq(1,9,by=2)
seq(8,20,length=6)
```{r simple_ex_6, echo = TRUE}
seq(1, 9, by=2)
seq(8, 20, length=6)
```
These examples illustrate that many functions in R have optional arguments, in this case, either
......@@ -153,8 +163,8 @@ out both of these options, R will make its own default choice, in this case assu
of 1. So, for example,
```{r ex-O-7, echo = TRUE}
x <- seq(1,10)
```{r simple_ex_7, echo = TRUE}
x <- seq(1, 10)
```
also generates a vector of integers from 1 to 10.
......@@ -166,35 +176,29 @@ or simply `?functionname` where
`functionname` is the name of the function you are interested in.
This will usually help and will often include
examples to make things even clearer.
Another useful function for building vectors is the `rep` command for repeating things.
Examples:
Another useful function for building vectors is the `rep` command for repeating things:
the first command will repeat the vector `r 1:3` six times, will the second
one will repeat each element six times.
```{r ex-O-8, echo = TRUE}
```{r simple_ex_8, echo = TRUE}
rep(1:3,6)
## repeat each element six times
rep(1:3,c(6,6,6))
## simplified
rep(1:3,rep(6,3))
rep(1:3, times = c(6,6,6))
```
As explained above, R will often adapt to the objects it is asked to work on. An example is the
R will often adapt to the objects it is asked to work on. An example is the
vectorized arithmetic used in R:
```{r vec}
x <- 1:5; y <- 5:1
x
y
x <- 1:5
y <- 5:1
x + y
x^2
# another example
x <- c(6,8,9)
y <- c(1,2,4)
x + y
x * y
```
```
showing that R uses component-wise arithmetic on vectors. R will also try to make sense if objects
are mixed. For example,
showing that R uses component-wise arithmetic on vectors. R will also try to
make sense of a statement if objects
are mixed. For example:
```{r vec-2}
x <- c(6,8,9)
......@@ -220,7 +224,7 @@ of a vector. R also has basic calculator capabilities:
### Exercise: Simple R operations
__Exercise: Simple R operations__
Define
......@@ -237,9 +241,9 @@ Decide what the result will be of the following:
* ` sum(x) `
* ` sum(x\textasciicircum 2) `
* ` x+y `
* x*y `
* `x*y `
* ` x-2 `
* ` x\textasciicircum 2 `
* ` x^2 `
Use R to check your answers.
......@@ -262,7 +266,7 @@ you are right:
* ` rep(1:4,4) `
* ` rep(1:4,rep(3,4)) `
* Use the rep function to define simply the following vectors in R.
Use the rep function to define simply the following vectors in R.
* ` 6,6,6,6,6,6 `
* ` 5,8,5,8,5,8,5,8 `
......@@ -270,7 +274,7 @@ you are right:
### Exercise: R as a calculator
__Exercise: R as a calculator__
Calculate the following expression,
where `x} and `y} have values `-0.25` and `2`
respectively.
......@@ -290,7 +294,7 @@ Let's suppose we've collected some data from an experiment and stored them
in an object `x`.
Some simple summary statistics of these data can be produced:
```{r summary-1, echo = TRUE}
```{r summary_1, echo = TRUE}
x <- c(7.5,8.2,3.1,5.6,8.2,9.3,6.5,7.0,9.3,1.2,14.5,6.2)
mean(x)
var(x)
......@@ -309,28 +313,29 @@ summary(x[1:6])
summary(x[7:12])
```
Other subsets can be created in the obvious way. For example:
You simply put the indexes of the element you want to access in square brackets.
Note that R starts counting from 1 onwards.
Other subsets can be created in the obvious way. Putting a minus in front, excludes
the elements:
```{r subscr-2, echo = TRUE}
```{r subscr_2, echo = TRUE}
x[c(2,4,9)]
x[-(1:6)]
# compare to
x[7:12]
```
Additionally, there are some useful commands to order and sort vectors
* `{sort}`: sort in increasing order
* `{order}`: orders the indexes is such a way that the elements
* `sort`: sort in increasing order
* `order`: orders the indexes is such a way that the elements
of the vector are sorted, i.e `sort(v) = v[order(v)]`
* `{rank}`: gives the ranks of the elements
of a vector, different options for handling \textit{ties} are
* `rank`: gives the ranks of the elements
of a vector, different options for handling *ties* are
available.
```{r sort-rank, echo = TRUE}
x <- c(1.3,3.5,2.7,6.3,6.3)
x <- c(1.3, 3.5, 2.7, 6.3, 6.3)
sort(x)
order(x)
x[order(x)]
......@@ -339,9 +344,9 @@ rank(x)
### Exercise
__Exercise__
* Define
* Define
`x <- c(5,9,2,3,4,6,7,0,8,12,2,9) `
......@@ -358,9 +363,8 @@ Decide what the result will be of the following:
Use R to check your answers.
* The ` y <- c(33,44,29,16,25,45,33,19,54,22,21,49,11,24,56)` contain sales of milk
in liters for 5 days in three different shops (the first 3 values are for shops 1,2 and 3 on
* The vector ` y <- c(33,44,29,16,25,45,33,19,54,22,21,49,11,24,56)` contains
sales of milk in liters for 5 days in three different shops (the first 3 values are for shops 1,2 and 3 on
Monday, etc.). Produce a statistical summary of the sales for each day of the week and also
for each shop.
......@@ -371,8 +375,6 @@ R is an object-oriented language, so every data item is an object in R.
As in other programming languages, objects are instances of "blue-prints" called classes.
There are the following elementary types or ("modes"):
* numeric: real number
* character: chain of characters, text
* factor: String or numbers, describing certain categories
......@@ -381,18 +383,17 @@ There are the following elementary types or ("modes"):
Inf, -Inf (infinity), NaN (not a number)
Data storage types includes matrices, lists and data frames, which will be introduced
Data storage types includes matrices, lists, data frames (tibbles), which will be introduced
in the next section. Certain types can have different subtypes, e.g. numeric
can be further subdivided into the integer, single and double types. Types
can be checked by the `is.*` and changed ("casted") by the
`as.*` functions. Furthermore, the function
`str` is very useful in order to obtain an overview of an (possibly
complex) object at hand. The following examples will make this clear:
complex) object at hand. The following examples will make this clear.
We first assign the value `9` to an object and then perform various operations on it.
```{r object-examples, echo = TRUE}
#assign value "9" to an object
a <- 9
# is a a string?
is.character(a)
......@@ -413,76 +414,60 @@ str(a)
# Matrices, lists, data frames and basic data handling
## Matrices
Matrices can be created in R in a variety of ways. Perhaps the simplest is to create the columns
Matrices are two--dimensional vectors and
can be created in R in a variety of ways. Perhaps the simplest is to create the columns
and then glue them together with the command `cbind`. For example,
```{r cbind-ex, echo = TRUE}
x <- c(5,7,9)
y <- c(6,3,4)
x <- c(5, 7 ,9)
y <- c(6, 3 ,4)
z <- cbind(x,y)
z
## dimensions: 3 rows and 2 columns
dim(z)
### matrix constructor
z <- matrix(c(5,7,9,6,3,4),nrow=3)
```
We can also use the function `matrix()` directly to create a matrix.
```{r matrix_direct, echo = TRUE}
z <- matrix(c(5, 7, 9, 6, 3, 4), nrow = 3)
```
There is a similar command, `rbind`, for building matrices by
gluing rows together.
The functions `cbind} and `rbind` can also be applied to matrices themselves
(provided the dimensions match) to form larger matrices. Matrices can also be built by explicit construction
via the function matrix. Notice that the dimension of the matrix is determined
by the size of the vector and the requirement that the number of rows is 3 in the example above, as specified by the
argument `nrow=3`. As an alternative we could have specified the number of columns with the
argument `ncol=2` (obviously, it is unnecessary to give both). Notice that the matrix is "filled up"
column-wise. If instead you wish to fill up row-wise, add the option `byrow=T`. For example:
The functions `cbind` and `rbind` can also be applied to matrices themselves
(provided the dimensions match) to form larger matrices.
Notice that the dimension of the matrix is determined
by the size of the vector and the requirement that the number of
rows is 3 in the example above, as specified by the
argument `nrow = 3`. As an alternative we could have specified the number of columns with the
argument `ncol = 2` (obviously, it is unnecessary to give both). Notice that the matrix is "filled up"
column-wise. If instead you wish to fill up row-wise, add the option `byrow=TRUE`.
```{r Matrix-ex, echo = TRUE}
z <- matrix(c(5,7,9,6,3,4),nr=3,byrow=T)
z <- matrix(c(5, 7 , 9 ,6 ,3 , 4), nrow = 3, byrow = TRUE)
z
```
Notice that the argument `nrow} has been abbreviated to `nr`.
Such abbreviations are always possible for function arguments provided it induces
no ambiguity -- if in doubt always use the full
argument name. As usual, R will try to interpret operations on matrices in a natural way.