Introduction to R and RStudio

author: Pierre-Alexandre Balland date: 05-12-2016 autosize: true width: 1440 height: 900

Working directory

# See working directory
getwd()
[1] "C:/Dropbox/University/2. Teaching/2016-01-USE-ORGNET"
# Change working directory
# setwd ("C:/network")
# List the files in your directory 
list.files()
 [1] "data.csv"                          
 [2] "Lab1-figure"                       
 [3] "Lab1.html"                         
 [4] "Lab1.pptx"                         
 [5] "Lab1.Rpres"                        
 [6] "ORGNET.docx"                       
 [7] "ORGNET.pdf"                        
 [8] "Week 1 - Introduction - USE.pdf"   
 [9] "Week 1 - Introduction - USE.pptx"  
[10] "Week 1 - script.r"                 
[11] "Week 2 - Graph theory - USE.pdf"   
[12] "Week 2 - Graph theory - USE.pptx"  
[13] "Week 3 - Complex networks.pptx"    
[14] "Week 4 - Centrality and power.pptx"
# Read the data in R 
data <- read.csv ("data.csv", header = T)
data
    City Pop  GDP Country
1     LA  14  700      US
2    NYC  20 1300      US
3 London  13  650      UK
4 Paris   12  600      FR
# Inspect the data
View(data)
head(data)
    City Pop  GDP Country
1     LA  14  700      US
2    NYC  20 1300      US
3 London  13  650      UK
4 Paris   12  600      FR
# Inspect the data
str(data)
'data.frame':   4 obs. of  4 variables:
 $ City   : Factor w/ 4 levels "LA","London",..: 1 3 2 4
 $ Pop    : int  14 20 13 12
 $ GDP    : int  700 1300 650 600
 $ Country: Factor w/ 3 levels "FR","UK","US": 3 3 2 1
summary(data)
     City        Pop             GDP         Country
 LA    :1   Min.   :12.00   Min.   : 600.0   FR:1   
 London:1   1st Qu.:12.75   1st Qu.: 637.5   UK:1   
 NYC   :1   Median :13.50   Median : 675.0   US:2   
 Paris :1   Mean   :14.75   Mean   : 812.5          
            3rd Qu.:15.50   3rd Qu.: 850.0          
            Max.   :20.00   Max.   :1300.0          
# Plot 
plot(data$Pop, data$GDP)

plot of chunk unnamed-chunk-7

# Look at correlations
cor(data$Pop, data$GDP)
[1] 0.9945734
# Create a new variable  
data$GDPcap <- data$GDP/data$Pop
data
    City Pop  GDP Country GDPcap
1     LA  14  700      US     50
2    NYC  20 1300      US     65
3 London  13  650      UK     50
4 Paris   12  600      FR     50
# Fit a linear regression model  
fit <- lm (data$GDPcap ~ data$Pop)
summary (fit)

Call:
lm(formula = data$GDPcap ~ data$Pop)

Residuals:
      1       2       3       4 
-2.2258  0.5806 -0.1935  1.8387 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)  
(Intercept)  23.7742     5.0536   4.704   0.0423 *
data$Pop      2.0323     0.3352   6.062   0.0261 *
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 2.087 on 2 degrees of freedom
Multiple R-squared:  0.9484,    Adjusted R-squared:  0.9226 
F-statistic: 36.75 on 1 and 2 DF,  p-value: 0.02615
# Plot 
plot(data$Pop, data$GDP)

plot of chunk unnamed-chunk-11

# Center the population variable  
data$c.Pop <- scale(data$Pop, center = TRUE, scale = FALSE)
data
    City Pop  GDP Country GDPcap c.Pop
1     LA  14  700      US     50 -0.75
2    NYC  20 1300      US     65  5.25
3 London  13  650      UK     50 -1.75
4 Paris   12  600      FR     50 -2.75
# Fit a new model  
fit2 <- lm(data$GDPcap ~ data$c.Pop)
summary(fit2)

Call:
lm(formula = data$GDPcap ~ data$c.Pop)

Residuals:
      1       2       3       4 
-2.2258  0.5806 -0.1935  1.8387 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  53.7500     1.0434  51.514 0.000377 ***
data$c.Pop    2.0323     0.3352   6.062 0.026148 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 2.087 on 2 degrees of freedom
Multiple R-squared:  0.9484,    Adjusted R-squared:  0.9226 
F-statistic: 36.75 on 1 and 2 DF,  p-value: 0.02615