Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
B
BTM2016_RDatasets
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Hugo Carlos
BTM2016_RDatasets
Commits
c575961e
Commit
c575961e
authored
Oct 20, 2016
by
Hugo Carlos
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
R code
parent
d5cf05f6
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
204 additions
and
0 deletions
+204
-0
Exploring_bodyfat.R
Exploring_bodyfat.R
+204
-0
No files found.
Exploring_bodyfat.R
0 → 100644
View file @
c575961e
download.file
(
"https://git.embl.de/hsanchez/BTM2016_RDatasets/raw/master/bodyfat.csv"
,
"bodyfat.csv"
)
bodyfat
<-
read.csv
(
"bodyfat.csv"
)
head
(
bodyfat
)
# Names of the columns
names
(
bodyfat
)
# Dimensions
dim
(
bodyfat
)
# Checking only one column, try the TAB key
bodyfat
$
age
head
(
bodyfat
[
,
4
]
)
head
(
bodyfat
[
,
4
],
12
)
# Numeric values describing age
summary
(
bodyfat
$
age
)
# Getting the mean of the age values
summary
(
bodyfat
$
age
)[
4
]
mean
(
bodyfat
$
age
)
# How many are younger than 31 years old?
younger_than_31
<-
bodyfat
[
(
bodyfat
$
age
<
31
),
]
length
(
younger_than_31
)
# Another way
not_older_than_30
<-
bodyfat
[
!
(
bodyfat
$
age
>
30
),
]
length
(
not_older_than_30
)
# Watch out!
head
(
bodyfat
[
!
(
bodyfat
$
age
>
30
),
]
)
head
(
bodyfat
[
!
which
(
bodyfat
$
age
>
30
),
]
)
## not equivalent!
number_of_columns
<-
length
(
names
(
bodyfat
))
number_of_columns
<-
dim
(
bodyfat
)[
2
]
my_vector
<-
vector
(
length
=
number_of_columns
)
for
(
i
in
1
:
length
(
names
(
bodyfat
)
)
){
one_column
<-
bodyfat
[
,
i
]
the_mean
<-
mean
(
one_column
)
my_vector
[
i
]
<-
the_mean
}
names
(
my_vector
)
<-
names
(
bodyfat
)
my_vector
apply
(
bodyfat
,
MARGIN
=
2
,
FUN
=
mean
)
usingFor
<-
function
(){
my_vector
<-
vector
(
length
=
number_of_columns
)
for
(
i
in
1
:
length
(
names
(
bodyfat
)
)
){
one_column
<-
bodyfat
[
,
i
]
the_mean
<-
mean
(
one_column
)
my_vector
[
i
]
<-
the_mean
}
names
(
my_vector
)
<-
names
(
bodyfat
)
}
usingApply
<-
function
(){
temp
<-
apply
(
bodyfat
,
MARGIN
=
2
,
FUN
=
mean
)
}
system.time
(
replicate
(
1000000
,
usingFor
))
system.time
(
replicate
(
1000000
,
usingApply
))
library
(
pryr
)
mem_change
(
v
<-
1
:
1e6
)
mem_change
(
rm
(
v
))
try
(
rm
(
new_variable
))
mem_change
(
new_variable
<-
bodyfat
$
age
)
# Going back!
apply
(
bodyfat
,
MARGIN
=
2
,
FUN
=
mean
)
Subset_1_indexes
<-
which
(
bodyfat
$
age
<
31
)
Subset_1
<-
bodyfat
[
Subset_1_indexes
,
]
# Getting the 30+
temp_indexes
<-
which
(
bodyfat
$
age
>
30
)
Subset30plus
<-
bodyfat
[
temp_indexes
,
]
Subset_2_indexes
<-
which
(
Subset30plus
$
age
<
51
)
Subset_2
<-
Subset30plus
[
Subset_2_indexes
,
]
Subset_3_indexes
<-
which
(
Subset30plus
$
age
>
50
)
Subset_3
<-
Subset30plus
[
Subset_3_indexes
,
]
summary
(
Subset_2
$
age
)
summary
(
Subset_3
$
age
)
# Finally, getting the weight per Subset:
mean
(
Subset_1
$
percent.fat
)
mean
(
Subset_2
$
percent.fat
)
mean
(
Subset_3
$
percent.fat
)
# Young people
mean
(
bodyfat
[
bodyfat
$
age
<
31
,
]
$
percent.fat
)
# Not so young people
mean
(
bodyfat
[
bodyfat
$
age
>
30
&
bodyfat
$
age
<
51
,
]
$
percent.fat
)
# Definitely not young people
mean
(
bodyfat
[
bodyfat
$
age
>
50
,
]
$
percent.fat
)
Option1
<-
function
(){
Subset_1_indexes
<-
which
(
bodyfat
$
age
<
31
)
Subset_1
<-
bodyfat
[
Subset_1_indexes
,
]
temp_indexes
<-
which
(
bodyfat
$
age
>
30
)
Subset30plus
<-
bodyfat
[
temp_indexes
,
]
Subset_2_indexes
<-
which
(
Subset30plus
$
age
<
51
)
Subset_2
<-
Subset30plus
[
Subset_2_indexes
,
]
Subset_3_indexes
<-
which
(
Subset30plus
$
age
>
50
)
Subset_3
<-
Subset30plus
[
Subset_3_indexes
,
]
summary
(
Subset_2
$
age
)
summary
(
Subset_3
$
age
)
mean
(
Subset_1
$
percent.fat
)
mean
(
Subset_2
$
percent.fat
)
mean
(
Subset_3
$
percent.fat
)
}
Option2
<-
function
(){
mean
(
bodyfat
[
bodyfat
$
age
<
31
,
]
$
percent.fat
)
mean
(
bodyfat
[
bodyfat
$
age
>
30
&
bodyfat
$
age
<
51
,
]
$
percent.fat
)
mean
(
bodyfat
[
bodyfat
$
age
>
50
,
]
$
percent.fat
)
}
system.time
(
replicate
(
1000000
,
Option1
))
system.time
(
replicate
(
1000000
,
Option2
))
# Which columns are not the index, nor the age?
I_do_not_want_these_ones
<-
which
(
names
(
bodyfat
)
==
c
(
"X"
,
"age"
))
names
(
bodyfat
[
-
I_do_not_want_these_ones
])
# I calculate the pearson correlations of age and all the other values
sapply
(
bodyfat
[
-
I_do_not_want_these_ones
],
function
(
x
){
cor
(
bodyfat
$
age
,
x
)
})
cor
(
bodyfat
$
percent.fat
,
bodyfat
$
density
)
plot
(
bodyfat
$
age
,
bodyfat
$
percent.fat
,
main
=
"Age vs Percent fat"
)
plot
(
bodyfat
$
age
,
bodyfat
$
density
,
main
=
"Age vs Density"
)
plot
(
bodyfat
$
percent.fat
,
bodyfat
$
density
,
main
=
"Percent fat vs Density"
)
pairs
(
bodyfat
)
pairs
(
bodyfat
[
,
1
:
6
])
message
(
"Your code is talking to you"
)
#for(i in c(2, 1, 0, -1)){
# sqrt(i)
#}
for
(
i
in
c
(
2
,
1
,
0
,
-1
)){
if
(
i
>=
0
){
sqrt
(
i
)
print
(
i
)
}
}
# Trying to calculate the mean of the columns 4th, 3rd, and 17th
# Unknown vector
UnknownVector
<-
c
(
c
(
4
,
3
,
17
))
#sapply( UnknonwVector, function(x){
# mean(bodyfat[, x])
#})
# bodyfat[ ,17]
# Lets prevent to have an error:
# option 1:
sapply
(
UnknownVector
,
function
(
x
){
try
(
mean
(
bodyfat
[,
x
]))
}
)
# Option 2:
sapply
(
UnknownVector
[
which
(
UnknownVector
%in%
1
:
dim
(
bodyfat
)[
2
]
)
],
function
(
x
){
mean
(
bodyfat
[,
x
]
)
}
)
# Option 3:
options
(
show.error.messages
=
FALSE
)
sapply
(
UnknownVector
,
function
(
x
){
to_return
<-
NA
column
<-
try
(
bodyfat
[,
x
])
if
(
class
(
column
)
==
"try-error"
){
message
(
paste0
(
"The column "
,
x
,
" does not exist, I'm sorry :( "
)
)
}
else
to_return
<-
mean
(
column
)
return
(
to_return
)
}
)
options
(
show.error.messages
=
TRUE
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment