Commit 8c82bce8 authored by Thomas Schwarzl's avatar Thomas Schwarzl

Add new file

parent 107a3768
# basic R usage
4 + 6
x <- 6
y <- 4
x + y
# list all variables in your environment
ls()
sqrt(16)
# to remove a variable
rm(x)
z <- c(5,9,1,0)
x <- c(5,9)
# combine two vectors
combinedVector <- c(z,x)
# another to create a vector is to use seq()
seq(1,9, by=2)
seq(8,20, length=6)
# R will handle vectors for vector arithmatic
x <- seq(1,5,by=1)
y <- 1:5
# adding two vectors will return a vector of sums
x + y
# add a vector and a numeric value
x + 3
x
shortVector <- c(1,3)
longVector <- 1:6
shortVector + longVector
longVector
midVector <- 1:5
midVector
midVector + longVector
# some useful functions that work with vectors
length(midVector)
mean(midVector)
summary(longVector)
min(longVector)
max(longVector,shortVector)
?seq()
# how to subset vectors
x<-c(7.5, 8.2,3.1,5.6,8.2,9.3,6.5,7.0,9.3,1.2,14.5,6.2)
mean(x)
x[1]
x[c(1,5,8)]
x[1:5]
head(x)
# arrange values in a vector
sort(x)
sort(x = x,decreasing = TRUE)
sort(x,TRUE)
sort(longVector, decreasing = TRUE)
sort(x, decreasing) # this will cause error
sort(TRUE,x) # this will cause confusing error
# this is why it's safer to use named parameters
sort(x=x,decreasing=TRUE ) # best way to call this function
sort(x=longVector,decreasing=TRUE)
# Data types
# numerics
9
a <- 9
# is. functions to test for the data type
is.numeric(a)
# character
myChar <- "t"
is.numeric(myChar)
is.character(myChar)
# logicals
TRUE
FALSE
myLgl <- TRUE
is.logical(myLgl)
# save numbers as characters
myCharNum <- "9"
is.numeric(myCharNum)
# change the type of an object
# called "coersion"
as.numeric(myCharNum)
# no quotes means that an object is a number
# quotes means that it is a character
myCoercedNum <- as.numeric(myCharNum)
# function to find classes or data types
class(myChar)
typeof(myChar)
str(myChar)
# Matrix
myMatrix <- matrix(data = c(5,7,9,3,4,6),nrow = 3)
myVec1 <- 3:9
myVec2 <- 13:19
cbind(myVec1, myVec2)
myMatrix * 2
myMatrix
# Pull data out of matrix similarly to vectors
myMatrix[1,1]
myMatrix[1,c(1,2)]
# short cut
myMatrix[,3]
myMatrix[,2]
# to pull out values from a matrix
# use [rows,columns]
myMatrix[-1,]
# this will print everything except the first row
myMixedVector <- c(1,2,4,"a")
# what if we want to hold differen data types in the same
# object
# we can use a list
myList <- list(1,2,4,"a")
# named list
myNamedList <- list(myFirstElem=1,
mySecond=3,
myCharElem="a")
myNamedList
# $ sign notation can pull out named elements
myNamedList$mySecond
myNamedList[2]
# single [] will always return a list (from a list)
mySub1 <- myNamedList[2]
# double [[]] will return value at the position
myNamedList[[2]]
# review of accessing elements in a vector
# accessing by name
namedVector <- c(Alice = 5.5, Bob = 6.4, Steve=5.9)
namedVector
namedVector["Alice"]
# access by position
namedVector[1]
# access using logicals
namedVector[c(TRUE,TRUE,FALSE)]
namedVector == "Alice"
namedVector == 5.5
namedVector[namedVector == 5.5]
# > >= != <=
myNamedList$myFirstElem
namedVector["Alice"]
#coersion
# change the data type of an object
as.numeric("9")
as.numeric("a")
library(tidyverse)
load(url("http://www-huber.embl.de/users/klaus/BasicR/bodyfat.rda"))
bodyfatDF <- bodyfat
head(bodyfat)
str(bodyfat)
bodyfat
as_tibble(bodyfat)
# two ways to create a tibble
bodyfat <- as_tibble(bodyfat) # coerce
tibble(bodyfatDF) # create new from data
head(bodyfat)
bodyfat
# interact with this tibble
# filter() will let you pull out rows from a tibble
bodyfat
filter(.data = bodyfat, age < 40)
filter(.data = bodyfat, age > 40 & age < 60)
filter(bodyfat, age < 40 | age > 60)
# arrange rows in different orders
arrange(bodyfat, age)
arrange(bodyfat, age, weight)
# change the direction of the order
arrange(bodyfat, desc(age), weight)
# select columns
select(bodyfat, age, weight)
# select columns we don't want
select(bodyfat, -age, -weight)
# another way to pull out a column is by name
bodyfat$age
# another using baseR to pull out data []
bodyfat[1,5]
# create new data
mutate(bodyfat, weight_kg = weight*0.454)
bodyfatWithKG <- mutate(bodyfat,weight_kg = weight*0.454)
oneCol <- select(bodyfatWithKG, weight_kg)
# chaining aka piping
# avoids creating intermediate objects
oneCol <- mutate(bodyfat, weight_kg = weight*0.454) %>%
select(weight_kg)
x
sort(x)
head(x,n=2)
x %>% sort()
x %>% sort(decreasing = TRUE)
x %>% sort() %>% head(n=2) %>% mean()
# alternative to write this without pipes
mean(head(sort(x),n=2)) # this is hard
# Challenge
# create a new column in the bodyfat tibble
# called height_m
# conversion : in to m = x*0.0254
# save that as bodyfat
# show just the two height columns for age > 40
# yellow sticky when you're done
bodyfat_converted <- mutate(bodyfat, height_m = height*0.0254)
bodyfat_converted %>%
filter(age > 40) %>%
select(height, height_m)
# summarise
summarise(bodyfat, meanAge = mean(age))
summarise(bodyfat, meanAge = mean(age),
medianAge = median(age))
# this only returns the colums that we calculated
bodyfat %>%
mutate(olderThan40 = age > 40) %>%
group_by(olderThan40) %>%
summarise(meanAge = mean(age),
meanWeight = mean(weight))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment