Commit c575961e authored by Hugo Carlos's avatar Hugo Carlos
Browse files

R code

parent d5cf05f6
download.file("https://git.embl.de/hsanchez/BTM2016_RDatasets/raw/master/bodyfat.csv", "bodyfat.csv")
bodyfat<-read.csv("bodyfat.csv")
head(bodyfat)
# Names of the columns
names(bodyfat)
# Dimensions
dim(bodyfat)
# Checking only one column, try the TAB key
bodyfat$age
head( bodyfat[ ,4] )
head( bodyfat[ ,4], 12 )
# Numeric values describing age
summary(bodyfat$age)
# Getting the mean of the age values
summary(bodyfat$age)[4]
mean(bodyfat$age)
# How many are younger than 31 years old?
younger_than_31 <- bodyfat[ (bodyfat$age < 31), ]
length(younger_than_31)
# Another way
not_older_than_30 <- bodyfat[ !(bodyfat$age > 30), ]
length(not_older_than_30)
# Watch out!
head( bodyfat[ !(bodyfat$age > 30), ] )
head( bodyfat[ !which(bodyfat$age > 30), ] )
## not equivalent!
number_of_columns <- length(names(bodyfat))
number_of_columns <- dim(bodyfat)[2]
my_vector <- vector(length = number_of_columns)
for( i in 1:length( names(bodyfat) ) ){
one_column <- bodyfat[ ,i]
the_mean <- mean(one_column)
my_vector[i] <- the_mean
}
names(my_vector) <- names(bodyfat)
my_vector
apply(bodyfat, MARGIN = 2, FUN = mean)
usingFor <- function(){
my_vector <- vector(length = number_of_columns)
for( i in 1:length( names(bodyfat) ) ){
one_column <- bodyfat[ ,i]
the_mean <- mean(one_column)
my_vector[i] <- the_mean
}
names(my_vector) <- names(bodyfat)
}
usingApply <- function(){
temp <- apply(bodyfat, MARGIN = 2, FUN = mean)
}
system.time(replicate(1000000, usingFor))
system.time(replicate(1000000, usingApply))
library(pryr)
mem_change(v <- 1:1e6)
mem_change(rm(v))
try(rm(new_variable))
mem_change(new_variable <- bodyfat$age)
# Going back!
apply(bodyfat, MARGIN = 2, FUN = mean)
Subset_1_indexes<-which(bodyfat$age < 31)
Subset_1<-bodyfat[Subset_1_indexes, ]
# Getting the 30+
temp_indexes<-which(bodyfat$age > 30)
Subset30plus<-bodyfat[temp_indexes, ]
Subset_2_indexes<-which(Subset30plus$age < 51)
Subset_2<-Subset30plus[Subset_2_indexes, ]
Subset_3_indexes<-which(Subset30plus$age > 50)
Subset_3<-Subset30plus[Subset_3_indexes, ]
summary(Subset_2$age)
summary(Subset_3$age)
# Finally, getting the weight per Subset:
mean(Subset_1$percent.fat)
mean(Subset_2$percent.fat)
mean(Subset_3$percent.fat)
# Young people
mean( bodyfat[ bodyfat$age < 31, ]$percent.fat )
# Not so young people
mean( bodyfat[ bodyfat$age > 30 & bodyfat$age < 51, ]$percent.fat )
# Definitely not young people
mean( bodyfat[ bodyfat$age > 50, ]$percent.fat )
Option1<-function(){
Subset_1_indexes<-which(bodyfat$age < 31)
Subset_1<-bodyfat[Subset_1_indexes, ]
temp_indexes<-which(bodyfat$age > 30)
Subset30plus<-bodyfat[temp_indexes, ]
Subset_2_indexes<-which(Subset30plus$age < 51)
Subset_2<-Subset30plus[Subset_2_indexes, ]
Subset_3_indexes<-which(Subset30plus$age > 50)
Subset_3<-Subset30plus[Subset_3_indexes, ]
summary(Subset_2$age)
summary(Subset_3$age)
mean(Subset_1$percent.fat)
mean(Subset_2$percent.fat)
mean(Subset_3$percent.fat)
}
Option2<-function(){
mean( bodyfat[ bodyfat$age < 31, ]$percent.fat )
mean( bodyfat[ bodyfat$age > 30 & bodyfat$age < 51, ]$percent.fat )
mean( bodyfat[ bodyfat$age > 50, ]$percent.fat )
}
system.time(replicate(1000000, Option1))
system.time(replicate(1000000, Option2))
# Which columns are not the index, nor the age?
I_do_not_want_these_ones<-which(names(bodyfat)==c("X", "age"))
names(bodyfat[-I_do_not_want_these_ones])
# I calculate the pearson correlations of age and all the other values
sapply(bodyfat[-I_do_not_want_these_ones], function(x){
cor(bodyfat$age, x)
})
cor(bodyfat$percent.fat, bodyfat$density)
plot(bodyfat$age, bodyfat$percent.fat, main="Age vs Percent fat")
plot(bodyfat$age, bodyfat$density, main="Age vs Density")
plot(bodyfat$percent.fat, bodyfat$density, main="Percent fat vs Density")
pairs(bodyfat)
pairs(bodyfat[ , 1:6])
message("Your code is talking to you")
#for(i in c(2, 1, 0, -1)){
# sqrt(i)
#}
for(i in c(2, 1, 0, -1)){
if(i >= 0){
sqrt(i)
print(i)
}
}
# Trying to calculate the mean of the columns 4th, 3rd, and 17th
# Unknown vector
UnknownVector <- c(c(4, 3, 17))
#sapply( UnknonwVector, function(x){
# mean(bodyfat[, x])
#})
# bodyfat[ ,17]
# Lets prevent to have an error:
# option 1:
sapply( UnknownVector, function(x){
try(mean(bodyfat[, x]))
} )
# Option 2:
sapply( UnknownVector[ which( UnknownVector %in% 1:dim(bodyfat)[2] ) ], function(x){
mean( bodyfat[, x] )
} )
# Option 3:
options(show.error.messages = FALSE)
sapply( UnknownVector, function(x){
to_return <- NA
column <- try(bodyfat[, x])
if( class(column) == "try-error" ){
message( paste0("The column ", x, " does not exist, I'm sorry :( ") )
} else
to_return <- mean( column )
return(to_return)
} )
options(show.error.messages = TRUE)
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment