5  Panel Data Regression

library(readxl)
datapanel = read_excel("Data/Bab 5/Data Panel.xlsx")
head(datapanel)
# A tibble: 6 × 6
  province  year realgdp population investment   hdi
  <chr>    <dbl>   <dbl>      <dbl>      <dbl> <dbl>
1 Aceh      2010 101545.    4523100       82.3  67.1
2 Aceh      2011 104874.    4619000      463.   67.4
3 Aceh      2012 108915.    4715100     1726.   67.8
4 Aceh      2013 111756.    4811100     4785.   68.3
5 Aceh      2014 113488.    4906800     5497.   68.8
6 Aceh      2015 112672.    5002000     4485.   69.4

5.1 Static Panel Data

library(plm)
model1 = log(realgdp) ~ log(population) + log(investment) + log(hdi)
# time vs individual effect
pFtest(model1, data = datapanel, effect = "time")

    F test for time effects

data:  model1
F = 3.1305, df1 = 6, df2 = 221, p-value = 0.005777
alternative hypothesis: significant effects
pFtest(model1, data = datapanel, effect = "individual")

    F test for individual effects

data:  model1
F = 824.45, df1 = 32, df2 = 195, p-value < 2.2e-16
alternative hypothesis: significant effects
pFtest(model1, data = datapanel, effect = "twoways")

    F test for twoways effects

data:  model1
F = 812.75, df1 = 38, df2 = 189, p-value < 2.2e-16
alternative hypothesis: significant effects

5.1.1 Pooled OLS

POLS <- plm(model1, data = datapanel, 
                      index = c("province", "year"), 
                      effect = "twoways", model = "pooling")
summary(POLS)
Pooling Model

Call:
plm(formula = model1, data = datapanel, effect = "twoways", model = "pooling", 
    index = c("province", "year"))

Balanced Panel: n = 33, T = 7, N = 231

Residuals:
     Min.   1st Qu.    Median   3rd Qu.      Max. 
-0.802950 -0.288399 -0.071922  0.204733  1.211596 

Coefficients:
                  Estimate Std. Error t-value  Pr(>|t|)    
(Intercept)     -14.998414   1.789923 -8.3794 5.578e-15 ***
log(population)   0.761682   0.034219 22.2592 < 2.2e-16 ***
log(investment)   0.200796   0.018761 10.7031 < 2.2e-16 ***
log(hdi)          3.187128   0.432136  7.3753 3.050e-12 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Total Sum of Squares:    316.63
Residual Sum of Squares: 37.477
R-Squared:      0.88164
Adj. R-Squared: 0.88008
F-statistic: 563.624 on 3 and 227 DF, p-value: < 2.22e-16

5.1.2 Fixed Effects Model

FEM <- plm(model1, data = datapanel, 
                      index = c("province", "year"), 
                      effect = "twoways", model = "within")

summary(FEM)
Twoways effects Within Model

Call:
plm(formula = model1, data = datapanel, effect = "twoways", model = "within", 
    index = c("province", "year"))

Balanced Panel: n = 33, T = 7, N = 231

Residuals:
      Min.    1st Qu.     Median    3rd Qu.       Max. 
-0.1065933 -0.0108718  0.0010591  0.0107126  0.1302578 

Coefficients:
                  Estimate Std. Error t-value Pr(>|t|)  
log(population) -0.4403249  0.2194092 -2.0069  0.04619 *
log(investment) -0.0046652  0.0039298 -1.1871  0.23666  
log(hdi)         1.4472500  0.7092888  2.0404  0.04270 *
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Total Sum of Squares:    0.24452
Residual Sum of Squares: 0.22795
R-Squared:      0.067783
Adj. R-Squared: -0.13444
F-statistic: 4.58081 on 3 and 189 DF, p-value: 0.0040297
# FEM vs. Pooled OLS
pFtest(FEM, POLS)

    F test for twoways effects

data:  model1
F = 812.75, df1 = 38, df2 = 189, p-value < 2.2e-16
alternative hypothesis: significant effects

5.1.3 Random Effects Model

REM <- plm(model1, data = datapanel, 
                      index = c("province", "year"), 
                      effect = "twoways", model = "random")
summary(REM)
Twoways effects Random Effect Model 
   (Swamy-Arora's transformation)

Call:
plm(formula = model1, data = datapanel, effect = "twoways", model = "random", 
    index = c("province", "year"))

Balanced Panel: n = 33, T = 7, N = 231

Effects:
                   var  std.dev share
idiosyncratic 0.001206 0.034728 0.008
individual    0.141041 0.375555 0.992
time          0.000000 0.000000 0.000
theta: 0.9651 (id) 0 (time) 0 (total)

Residuals:
      Min.    1st Qu.     Median    3rd Qu.       Max. 
-0.1099833 -0.0205049  0.0012902  0.0165381  0.1635731 

Coefficients:
                   Estimate  Std. Error z-value Pr(>|z|)    
(Intercept)     -2.1127e+01  1.0047e+00 -21.029   <2e-16 ***
log(population)  8.9072e-01  7.1716e-02  12.420   <2e-16 ***
log(investment)  9.0107e-04  4.1329e-03   0.218   0.8274    
log(hdi)         4.5687e+00  2.2971e-01  19.889   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Total Sum of Squares:    3.599
Residual Sum of Squares: 0.35333
R-Squared:      0.90183
Adj. R-Squared: 0.90053
Chisq: 2085.22 on 3 DF, p-value: < 2.22e-16

5.1.4 Hausman Test

phtest(FEM,REM)

    Hausman Test

data:  model1
chisq = 46.609, df = 3, p-value = 4.208e-10
alternative hypothesis: one model is inconsistent

5.1.5 Model Diagnostics

# Multicolinearity
library(car)
Loading required package: carData
vif(POLS)
log(population) log(investment)        log(hdi) 
       1.596289        1.641147        1.119590 
cor(datapanel[,4:6])
           population investment       hdi
population  1.0000000  0.7052198 0.1212988
investment  0.7052198  1.0000000 0.3222325
hdi         0.1212988  0.3222325 1.0000000
library(lmtest)
Loading required package: zoo

Attaching package: 'zoo'
The following objects are masked from 'package:base':

    as.Date, as.Date.numeric
# Heteroscedasticity
bptest(FEM) 

    studentized Breusch-Pagan test

data:  FEM
BP = 32.396, df = 3, p-value = 4.319e-07
# Autocorrelation
pbgtest(FEM)

    Breusch-Godfrey/Wooldridge test for serial correlation in panel models

data:  model1
chisq = 84.76, df = 7, p-value = 1.468e-15
alternative hypothesis: serial correlation in idiosyncratic errors
# Cluster Robust Standard Error
library(sandwich)
coeftest(FEM, vcovHC(FEM, type = "sss", cluster = "group"))

t test of coefficients:

                  Estimate Std. Error t value Pr(>|t|)
log(population) -0.4403249  0.4093013 -1.0758   0.2834
log(investment) -0.0046652  0.0056843 -0.8207   0.4128
log(hdi)         1.4472500  1.0807171  1.3392   0.1821
# HAC Robust Standard Error
coeftest(FEM, vcovHC(FEM, method="arellano"))

t test of coefficients:

                  Estimate Std. Error t value Pr(>|t|)
log(population) -0.4403249  0.4012959 -1.0973   0.2739
log(investment) -0.0046652  0.0055732 -0.8371   0.4036
log(hdi)         1.4472500  1.0595795  1.3659   0.1736

5.2 Dynamic Panel Data

head(datapanel)
# A tibble: 6 × 6
  province  year realgdp population investment   hdi
  <chr>    <dbl>   <dbl>      <dbl>      <dbl> <dbl>
1 Aceh      2010 101545.    4523100       82.3  67.1
2 Aceh      2011 104874.    4619000      463.   67.4
3 Aceh      2012 108915.    4715100     1726.   67.8
4 Aceh      2013 111756.    4811100     4785.   68.3
5 Aceh      2014 113488.    4906800     5497.   68.8
6 Aceh      2015 112672.    5002000     4485.   69.4
# lag(log(realgdp), 2:7) = Instrument
modeldyn1 = log(realgdp) ~ lag(log(realgdp)) + log(population) + log(investment) + log(hdi) | lag(log(realgdp),2:7) 
# Dynamic OLS and FEM
modeldyn2 = log(realgdp) ~ lag(log(realgdp)) + log(population) + log(investment) + log(hdi)

5.2.1 First Difference GMM

fd.gmm = pgmm(modeldyn1, data = datapanel)
Warning in pgmm(modeldyn1, data = datapanel): the second-step matrix is
singular, a general inverse is used
summary(fd.gmm)
Warning in vcovHC.pgmm(object): a general inverse is used
Twoways effects One-step model Difference GMM 

Call:
pgmm(formula = modeldyn1, data = datapanel)

Balanced Panel: n = 33, T = 7, N = 231

Number of Observations Used: 165
Residuals:
      Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
-0.0926516 -0.0071905 -0.0006685  0.0000000  0.0051944  0.1291078 

Coefficients:
                    Estimate Std. Error z-value  Pr(>|z|)    
lag(log(realgdp))  0.7312958  0.2057587  3.5541 0.0003792 ***
log(population)   -0.1750407  0.1798151 -0.9734 0.3303305    
log(investment)    0.0013969  0.0022465  0.6218 0.5340580    
log(hdi)           2.1197608  1.3035304  1.6262 0.1039137    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Sargan test: chisq(14) = 25.4339 (p-value = 0.030518)
Autocorrelation test (1): normal = -1.836971 (p-value = 0.066214)
Autocorrelation test (2): normal = 1.555262 (p-value = 0.11988)
Wald test for coefficients: chisq(4) = 95.86464 (p-value = < 2.22e-16)
Wald test for time dummies: chisq(5) = 8.364699 (p-value = 0.13725)

5.2.2 System GMM

sys.gmm = pgmm(modeldyn1, data = datapanel, transformation="ld")
Warning in pgmm(modeldyn1, data = datapanel, transformation = "ld"): the
second-step matrix is singular, a general inverse is used
summary(sys.gmm)
Warning in vcovHC.pgmm(object): a general inverse is used
Twoways effects One-step model System GMM 

Call:
pgmm(formula = modeldyn1, data = datapanel, transformation = "ld")

Balanced Panel: n = 33, T = 7, N = 231

Number of Observations Used: 363
Residuals:
      Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
-0.1198240 -0.0083907  0.0001403  0.0000000  0.0065018  0.1402918 

Coefficients:
                   Estimate Std. Error z-value  Pr(>|z|)    
lag(log(realgdp)) 0.9686425  0.0107842 89.8208 < 2.2e-16 ***
log(population)   0.0207419  0.0078302  2.6490  0.008074 ** 
log(investment)   0.0070513  0.0029150  2.4190  0.015565 *  
log(hdi)          0.1144280  0.0582058  1.9659  0.049308 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Sargan test: chisq(22) = 23.67029 (p-value = 0.36475)
Autocorrelation test (1): normal = -1.663193 (p-value = 0.096274)
Autocorrelation test (2): normal = 1.368565 (p-value = 0.17114)
Wald test for coefficients: chisq(4) = 314880.1 (p-value = < 2.22e-16)
Wald test for time dummies: chisq(5) = 29.57621 (p-value = 1.7869e-05)

5.2.3 Model Diagnotics

# FEM
FEMdyn = plm(modeldyn2, data = datapanel, index=c("province","year"), model="within")
summary(FEMdyn)
Oneway (individual) effect Within Model

Call:
plm(formula = modeldyn2, data = datapanel, model = "within", 
    index = c("province", "year"))

Balanced Panel: n = 33, T = 6, N = 198

Residuals:
       Min.     1st Qu.      Median     3rd Qu.        Max. 
-5.8397e-02 -6.5483e-03  1.4102e-05  5.9536e-03  1.1759e-01 

Coefficients:
                    Estimate Std. Error t-value  Pr(>|t|)    
lag(log(realgdp))  0.7670629  0.0397433 19.3004 < 2.2e-16 ***
log(population)   -0.2488432  0.1100829 -2.2605   0.02513 *  
log(investment)   -0.0014210  0.0024243 -0.5861   0.55862    
log(hdi)           1.8786968  0.2913007  6.4493 1.253e-09 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Total Sum of Squares:    1.9728
Residual Sum of Squares: 0.050149
R-Squared:      0.97458
Adj. R-Squared: 0.96889
F-statistic: 1543.1 on 4 and 161 DF, p-value: < 2.22e-16
# OLS
OLSdyn = plm(modeldyn2, data = datapanel, index=c("province","year"), model="pooling")
summary(OLSdyn)
Pooling Model

Call:
plm(formula = modeldyn2, data = datapanel, model = "pooling", 
    index = c("province", "year"))

Balanced Panel: n = 33, T = 6, N = 198

Residuals:
       Min.     1st Qu.      Median     3rd Qu.        Max. 
-0.10036444 -0.00975944 -0.00044271  0.00946369  0.12897120 

Coefficients:
                    Estimate Std. Error  t-value Pr(>|t|)    
(Intercept)       -0.1070843  0.1261711  -0.8487  0.39709    
lag(log(realgdp))  0.9868710  0.0041424 238.2382  < 2e-16 ***
log(population)    0.0088980  0.0036806   2.4175  0.01656 *  
log(investment)    0.0016145  0.0016016   1.0080  0.31470    
log(hdi)           0.0397307  0.0297085   1.3374  0.18268    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Total Sum of Squares:    269.87
Residual Sum of Squares: 0.09922
R-Squared:      0.99963
Adj. R-Squared: 0.99962
F-statistic: 131187 on 4 and 193 DF, p-value: < 2.22e-16

FDGMM = 0.731 SysGMM = 0.968 FEM = 0.767 OLS = 0.986

FEM < GMM < OLS Best Model: System GMM

summary(sys.gmm)
Warning in vcovHC.pgmm(object): a general inverse is used
Twoways effects One-step model System GMM 

Call:
pgmm(formula = modeldyn1, data = datapanel, transformation = "ld")

Balanced Panel: n = 33, T = 7, N = 231

Number of Observations Used: 363
Residuals:
      Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
-0.1198240 -0.0083907  0.0001403  0.0000000  0.0065018  0.1402918 

Coefficients:
                   Estimate Std. Error z-value  Pr(>|z|)    
lag(log(realgdp)) 0.9686425  0.0107842 89.8208 < 2.2e-16 ***
log(population)   0.0207419  0.0078302  2.6490  0.008074 ** 
log(investment)   0.0070513  0.0029150  2.4190  0.015565 *  
log(hdi)          0.1144280  0.0582058  1.9659  0.049308 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Sargan test: chisq(22) = 23.67029 (p-value = 0.36475)
Autocorrelation test (1): normal = -1.663193 (p-value = 0.096274)
Autocorrelation test (2): normal = 1.368565 (p-value = 0.17114)
Wald test for coefficients: chisq(4) = 314880.1 (p-value = < 2.22e-16)
Wald test for time dummies: chisq(5) = 29.57621 (p-value = 1.7869e-05)

5.2.4 Speed of Adjustment

alpha1 = sys.gmm$coef[1]
1-alpha1
lag(log(realgdp)) 
       0.03135749 

5.2.5 Half Time

log(0.5)/log(sys.gmm$coef[1])
lag(log(realgdp)) 
         21.75626 

5.2.6 Short Run and Long Run Coefficients

sys.gmm$coefficients[2] # Short Run Poppulation
log(population) 
     0.02074192 
sys.gmm$coefficients[2] / (1-alpha1) # Long Run Poppulation
log(population) 
      0.6614661