author: Pierre-Alexandre Balland date: 05-12-2016 autosize: true width: 1440 height: 900
# See working directory
getwd()
[1] "C:/Dropbox/University/2. Teaching/2016-01-USE-ORGNET"
# Change working directory
# setwd ("C:/network")
# List the files in your directory
list.files()
[1] "data.csv"
[2] "Lab1-figure"
[3] "Lab1.html"
[4] "Lab1.pptx"
[5] "Lab1.Rpres"
[6] "ORGNET.docx"
[7] "ORGNET.pdf"
[8] "Week 1 - Introduction - USE.pdf"
[9] "Week 1 - Introduction - USE.pptx"
[10] "Week 1 - script.r"
[11] "Week 2 - Graph theory - USE.pdf"
[12] "Week 2 - Graph theory - USE.pptx"
[13] "Week 3 - Complex networks.pptx"
[14] "Week 4 - Centrality and power.pptx"
# Read the data in R
data <- read.csv ("data.csv", header = T)
data
City Pop GDP Country
1 LA 14 700 US
2 NYC 20 1300 US
3 London 13 650 UK
4 Paris 12 600 FR
# Inspect the data
View(data)
head(data)
City Pop GDP Country
1 LA 14 700 US
2 NYC 20 1300 US
3 London 13 650 UK
4 Paris 12 600 FR
# Inspect the data
str(data)
'data.frame': 4 obs. of 4 variables:
$ City : Factor w/ 4 levels "LA","London",..: 1 3 2 4
$ Pop : int 14 20 13 12
$ GDP : int 700 1300 650 600
$ Country: Factor w/ 3 levels "FR","UK","US": 3 3 2 1
summary(data)
City Pop GDP Country
LA :1 Min. :12.00 Min. : 600.0 FR:1
London:1 1st Qu.:12.75 1st Qu.: 637.5 UK:1
NYC :1 Median :13.50 Median : 675.0 US:2
Paris :1 Mean :14.75 Mean : 812.5
3rd Qu.:15.50 3rd Qu.: 850.0
Max. :20.00 Max. :1300.0
# Plot
plot(data$Pop, data$GDP)
# Look at correlations
cor(data$Pop, data$GDP)
[1] 0.9945734
# Create a new variable
data$GDPcap <- data$GDP/data$Pop
data
City Pop GDP Country GDPcap
1 LA 14 700 US 50
2 NYC 20 1300 US 65
3 London 13 650 UK 50
4 Paris 12 600 FR 50
# Fit a linear regression model
fit <- lm (data$GDPcap ~ data$Pop)
summary (fit)
Call:
lm(formula = data$GDPcap ~ data$Pop)
Residuals:
1 2 3 4
-2.2258 0.5806 -0.1935 1.8387
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 23.7742 5.0536 4.704 0.0423 *
data$Pop 2.0323 0.3352 6.062 0.0261 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 2.087 on 2 degrees of freedom
Multiple R-squared: 0.9484, Adjusted R-squared: 0.9226
F-statistic: 36.75 on 1 and 2 DF, p-value: 0.02615
# Plot
plot(data$Pop, data$GDP)
# Center the population variable
data$c.Pop <- scale(data$Pop, center = TRUE, scale = FALSE)
data
City Pop GDP Country GDPcap c.Pop
1 LA 14 700 US 50 -0.75
2 NYC 20 1300 US 65 5.25
3 London 13 650 UK 50 -1.75
4 Paris 12 600 FR 50 -2.75
# Fit a new model
fit2 <- lm(data$GDPcap ~ data$c.Pop)
summary(fit2)
Call:
lm(formula = data$GDPcap ~ data$c.Pop)
Residuals:
1 2 3 4
-2.2258 0.5806 -0.1935 1.8387
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 53.7500 1.0434 51.514 0.000377 ***
data$c.Pop 2.0323 0.3352 6.062 0.026148 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 2.087 on 2 degrees of freedom
Multiple R-squared: 0.9484, Adjusted R-squared: 0.9226
F-statistic: 36.75 on 1 and 2 DF, p-value: 0.02615