Commit 8c82bce8 by Thomas Schwarzl

parent 107a3768
 # basic R usage 4 + 6 x <- 6 y <- 4 x + y # list all variables in your environment ls() sqrt(16) # to remove a variable rm(x) z <- c(5,9,1,0) x <- c(5,9) # combine two vectors combinedVector <- c(z,x) # another to create a vector is to use seq() seq(1,9, by=2) seq(8,20, length=6) # R will handle vectors for vector arithmatic x <- seq(1,5,by=1) y <- 1:5 # adding two vectors will return a vector of sums x + y # add a vector and a numeric value x + 3 x shortVector <- c(1,3) longVector <- 1:6 shortVector + longVector longVector midVector <- 1:5 midVector midVector + longVector # some useful functions that work with vectors length(midVector) mean(midVector) summary(longVector) min(longVector) max(longVector,shortVector) ?seq() # how to subset vectors x<-c(7.5, 8.2,3.1,5.6,8.2,9.3,6.5,7.0,9.3,1.2,14.5,6.2) mean(x) x[1] x[c(1,5,8)] x[1:5] head(x) # arrange values in a vector sort(x) sort(x = x,decreasing = TRUE) sort(x,TRUE) sort(longVector, decreasing = TRUE) sort(x, decreasing) # this will cause error sort(TRUE,x) # this will cause confusing error # this is why it's safer to use named parameters sort(x=x,decreasing=TRUE ) # best way to call this function sort(x=longVector,decreasing=TRUE) # Data types # numerics 9 a <- 9 # is. functions to test for the data type is.numeric(a) # character myChar <- "t" is.numeric(myChar) is.character(myChar) # logicals TRUE FALSE myLgl <- TRUE is.logical(myLgl) # save numbers as characters myCharNum <- "9" is.numeric(myCharNum) # change the type of an object # called "coersion" as.numeric(myCharNum) # no quotes means that an object is a number # quotes means that it is a character myCoercedNum <- as.numeric(myCharNum) # function to find classes or data types class(myChar) typeof(myChar) str(myChar) # Matrix myMatrix <- matrix(data = c(5,7,9,3,4,6),nrow = 3) myVec1 <- 3:9 myVec2 <- 13:19 cbind(myVec1, myVec2) myMatrix * 2 myMatrix # Pull data out of matrix similarly to vectors myMatrix[1,1] myMatrix[1,c(1,2)] # short cut myMatrix[,3] myMatrix[,2] # to pull out values from a matrix # use [rows,columns] myMatrix[-1,] # this will print everything except the first row myMixedVector <- c(1,2,4,"a") # what if we want to hold differen data types in the same # object # we can use a list myList <- list(1,2,4,"a") # named list myNamedList <- list(myFirstElem=1, mySecond=3, myCharElem="a") myNamedList # \$ sign notation can pull out named elements myNamedList\$mySecond myNamedList[2] # single [] will always return a list (from a list) mySub1 <- myNamedList[2] # double [[]] will return value at the position myNamedList[[2]] # review of accessing elements in a vector # accessing by name namedVector <- c(Alice = 5.5, Bob = 6.4, Steve=5.9) namedVector namedVector["Alice"] # access by position namedVector[1] # access using logicals namedVector[c(TRUE,TRUE,FALSE)] namedVector == "Alice" namedVector == 5.5 namedVector[namedVector == 5.5] # > >= != <= myNamedList\$myFirstElem namedVector["Alice"] #coersion # change the data type of an object as.numeric("9") as.numeric("a") library(tidyverse) load(url("http://www-huber.embl.de/users/klaus/BasicR/bodyfat.rda")) bodyfatDF <- bodyfat head(bodyfat) str(bodyfat) bodyfat as_tibble(bodyfat) # two ways to create a tibble bodyfat <- as_tibble(bodyfat) # coerce tibble(bodyfatDF) # create new from data head(bodyfat) bodyfat # interact with this tibble # filter() will let you pull out rows from a tibble bodyfat filter(.data = bodyfat, age < 40) filter(.data = bodyfat, age > 40 & age < 60) filter(bodyfat, age < 40 | age > 60) # arrange rows in different orders arrange(bodyfat, age) arrange(bodyfat, age, weight) # change the direction of the order arrange(bodyfat, desc(age), weight) # select columns select(bodyfat, age, weight) # select columns we don't want select(bodyfat, -age, -weight) # another way to pull out a column is by name bodyfat\$age # another using baseR to pull out data [] bodyfat[1,5] # create new data mutate(bodyfat, weight_kg = weight*0.454) bodyfatWithKG <- mutate(bodyfat,weight_kg = weight*0.454) oneCol <- select(bodyfatWithKG, weight_kg) # chaining aka piping # avoids creating intermediate objects oneCol <- mutate(bodyfat, weight_kg = weight*0.454) %>% select(weight_kg) x sort(x) head(x,n=2) x %>% sort() x %>% sort(decreasing = TRUE) x %>% sort() %>% head(n=2) %>% mean() # alternative to write this without pipes mean(head(sort(x),n=2)) # this is hard # Challenge # create a new column in the bodyfat tibble # called height_m # conversion : in to m = x*0.0254 # save that as bodyfat # show just the two height columns for age > 40 # yellow sticky when you're done bodyfat_converted <- mutate(bodyfat, height_m = height*0.0254) bodyfat_converted %>% filter(age > 40) %>% select(height, height_m) # summarise summarise(bodyfat, meanAge = mean(age)) summarise(bodyfat, meanAge = mean(age), medianAge = median(age)) # this only returns the colums that we calculated bodyfat %>% mutate(olderThan40 = age > 40) %>% group_by(olderThan40) %>% summarise(meanAge = mean(age), meanWeight = mean(weight))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!