Example with Varying Occasions
This example builds on the tutorial about college GPA, in Analysing Longitudinal Data: Fixed Occasions. Here, instead of analyzing data in which subjects are measured at the same time points, we have data where the subjects were measured at varying time points.
Here is the description from the Multilevel Analysis website for the Hox, Moerbeek, & van de Schoot (2018) text:
The data are a sample of 405 children who were within the first two years of entry to elementary school. The data consist of four repeated measures of both the child’s antisocial behavior and the child’s reading recognition skills. In addition, on the first measurement occasion, measures were collected of emotional support and cognitive stimulation provided by the mother. The data were collected using face-to-face interviews of both the child and the mother at two-year intervals between 1986 and 1992."
Variables
id |
Child identification number |
anti1-4 |
antisocial behavior time 1,2,3,4 |
read1-4 |
reading ability time 1,2,3,4 |
kidgen |
gender child (0 = girl, 1 = boy) |
momage |
age mother at time 1 |
kidage |
age child at time 1 |
homecog |
cognitive stimulation |
homeemo |
emotional support |
nmis |
number of missing values |
Import and Clean Data
curran <- read_sav("data/CurranData.sav")
curran <- within(curran, {
id <- factor(id)
kidgen <- factor(kidgen, labels = c("girl", "boy"))
})
curranlong <- gather(curran, key = key, value = val, anti1:read4)
curranlong <- separate(curranlong,
col = key,
into = c("variable", "occasion"),
sep = 4)
curranlong <- spread(curranlong, key = variable, value = val)
curranlong <- arrange(curranlong, id, occasion)
curranlong <- within(curranlong, {
occasion <- as.numeric(occasion)-1
kidagetv <- kidage + 2*occasion
})
curranlong <- subset(curranlong, !is.na(read) | !is.na(anti))
# or with dplyr
# curranlong <- filter(!is.na(read) | !is.na(anti))
tapply(curranlong$read, curranlong$occasion, function(x) sum(!is.na(x)))
0 1 2 3
405 375 275 270
ggplot(curranlong, aes(y = read, x = occasion)) + geom_point()
ggplot(curranlong, aes(y = read, x = kidage)) + geom_point()
ggplot(curranlong, aes(y = read, x = kidagetv)) + geom_point()
ggplot(curranlong, aes(y = read, x = kidagetv, group = id)) + geom_point() +
geom_line(alpha = .3)
Missing Data Analysis
library(VIM)
curran <- read_sav("data/CurranData.sav")
curran <- within(curran, {
id <- factor(id)
kidgen <- factor(kidgen, labels = c("girl", "boy"))
})
# Create long version of data
curranlong <- curran %>%
gather( key = key, value = val, anti1:read4) %>%
separate(col = key, into = c("variable", "occasion"), sep = 4) %>%
spread(key = variable, value = val) %>%
arrange(id, occasion) %>%
mutate(time = as.numeric(occasion)-1,
kidagetv = kidage + 2*time,
kidage6 = kidagetv - 6,
kidagesqr = kidage6^2,
c_momage = momage - mean(momage),
c_homecog = homecog - mean(homecog),
c_homeemo = homeemo - mean(homeemo)) # %>%
# filter(!is.na(read) | !is.na(anti))
aggr(curranlong, numbers = TRUE, prop = TRUE)
curranlong <- filter(curranlong, !is.na(read) | !is.na(anti))
aggr(curranlong, numbers = TRUE)
Models
curran <- read_sav("data/CurranData.sav")
curran <- within(curran, {
id <- factor(id)
kidgen <- factor(kidgen, labels = c("girl", "boy"))
})
# Create long version of data
curranlong <- curran %>%
gather( key = key, value = val, anti1:read4) %>%
separate(col = key, into = c("variable", "occasion"), sep = 4) %>%
spread(key = variable, value = val) %>%
arrange(id, occasion) %>%
mutate(time = as.numeric(occasion)-1,
kidagetv = kidage + 2*time,
kidage6 = kidagetv - 6,
kidagesqr = kidage6^2,
c_momage = momage - mean(momage),
c_homecog = homecog - mean(homecog),
c_homeemo = homeemo - mean(homeemo)) %>%
filter(!is.na(read) | !is.na(anti))
mod0 <- lmer(read ~ 1 + (1 | id), curranlong, REML = FALSE)
modage <- lmer(read ~ kidage6 + (1 | id), curranlong, REML = FALSE)
modage2 <- update(modage, . ~ . + kidagesqr)
modage2_2 <- update(modage2, . ~ . + c_momage + c_homecog + c_homeemo)
modage2_2rs <- update(modage2_2, . ~ . - (1 | id) + (1 + kidage6 | id))
modage2_2rsx <- update(modage2_2rs, . ~ . + kidage6:c_momage +
kidage6:c_homeemo +
kidage6:c_homecog)
modage2_2rsx2 <- update(modage2_2rsx, . ~ . - kidage6:c_homecog)
Baseline Models: Modeling Child Age
htmlreg(list(mod0, modage, modage2),
custom.model.names = c("Null", "Linear Age", "Quadratic Age"))
Statistical models
Â
|
Null
|
Linear Age
|
Quadratic Age
|
(Intercept)
|
4.11***
|
2.19***
|
1.74***
|
Â
|
(0.05)
|
(0.05)
|
(0.06)
|
kidage6
|
Â
|
0.55***
|
0.93***
|
Â
|
Â
|
(0.01)
|
(0.03)
|
kidagesqr
|
Â
|
Â
|
-0.05***
|
Â
|
Â
|
Â
|
(0.00)
|
AIC
|
5057.83
|
3421.92
|
3255.05
|
BIC
|
5073.40
|
3442.68
|
3280.99
|
Log Likelihood
|
-2525.91
|
-1706.96
|
-1622.52
|
Num. obs.
|
1325
|
1325
|
1325
|
Num. groups: id
|
405
|
405
|
405
|
Var: id (Intercept)
|
0.30
|
0.65
|
0.66
|
Var: Residual
|
2.39
|
0.46
|
0.39
|
p < 0.001; p < 0.01; p < 0.05
|
anova(modage, modage2)
Data: curranlong
Models:
modage: read ~ kidage6 + (1 | id)
modage2: read ~ kidage6 + (1 | id) + kidagesqr
npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
modage 4 3421.9 3442.7 -1707.0 3413.9
modage2 5 3255.0 3281.0 -1622.5 3245.0 168.87 1 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Models with Level 2 Predictors
Note, we are not considering the antisocial behavior in these models.
htmlreg(list(modage2, modage2_2),
custom.model.names = c("Quadratic", "Quadratic+L2 pred"))
Statistical models
Â
|
Quadratic
|
Quadratic+L2 pred
|
(Intercept)
|
1.74***
|
1.74***
|
Â
|
(0.06)
|
(0.06)
|
kidage6
|
0.93***
|
0.92***
|
Â
|
(0.03)
|
(0.03)
|
kidagesqr
|
-0.05***
|
-0.05***
|
Â
|
(0.00)
|
(0.00)
|
c_momage
|
Â
|
0.05*
|
Â
|
Â
|
(0.02)
|
c_homecog
|
Â
|
0.05**
|
Â
|
Â
|
(0.02)
|
c_homeemo
|
Â
|
0.04*
|
Â
|
Â
|
(0.02)
|
AIC
|
3255.05
|
3232.20
|
BIC
|
3280.99
|
3273.71
|
Log Likelihood
|
-1622.52
|
-1608.10
|
Num. obs.
|
1325
|
1325
|
Num. groups: id
|
405
|
405
|
Var: id (Intercept)
|
0.66
|
0.60
|
Var: Residual
|
0.39
|
0.39
|
p < 0.001; p < 0.01; p < 0.05
|
anova(modage2, modage2_2)
Data: curranlong
Models:
modage2: read ~ kidage6 + (1 | id) + kidagesqr
modage2_2: read ~ kidage6 + (1 | id) + kidagesqr + c_momage + c_homecog +
modage2_2: c_homeemo
npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
modage2 5 3255.0 3281.0 -1622.5 3245.0
modage2_2 8 3232.2 3273.7 -1608.1 3216.2 28.852 3 2.406e-06 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Model with Random Slope of age
htmlreg(list(modage2_2, modage2_2rs),
custom.model.names = c("Fixed Age Curve", "Random Age Curve"))
Statistical models
Â
|
Fixed Age Curve
|
Random Age Curve
|
(Intercept)
|
1.74***
|
1.75***
|
Â
|
(0.06)
|
(0.05)
|
kidage6
|
0.92***
|
0.92***
|
Â
|
(0.03)
|
(0.03)
|
kidagesqr
|
-0.05***
|
-0.05***
|
Â
|
(0.00)
|
(0.00)
|
c_momage
|
0.05*
|
0.03
|
Â
|
(0.02)
|
(0.02)
|
c_homecog
|
0.05**
|
0.04*
|
Â
|
(0.02)
|
(0.01)
|
c_homeemo
|
0.04*
|
0.00
|
Â
|
(0.02)
|
(0.02)
|
AIC
|
3232.20
|
3035.44
|
BIC
|
3273.71
|
3087.33
|
Log Likelihood
|
-1608.10
|
-1507.72
|
Num. obs.
|
1325
|
1325
|
Num. groups: id
|
405
|
405
|
Var: id (Intercept)
|
0.60
|
0.21
|
Var: Residual
|
0.39
|
0.28
|
Var: id kidage6
|
Â
|
0.02
|
Cov: id (Intercept) kidage6
|
Â
|
0.04
|
p < 0.001; p < 0.01; p < 0.05
|
ranova(modage2_2rs)
ANOVA-like table for random-effects: Single term deletions
Model:
read ~ kidage6 + kidagesqr + c_momage + c_homecog + c_homeemo +
(1 + kidage6 | id)
npar logLik AIC LRT Df Pr(>Chisq)
<none> 10 -1507.7 3035.4
kidage6 in (1 + kidage6 | id) 8 -1608.1 3232.2 200.76 2 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Model with Cross-level Interactions
htmlreg(list(modage2_2rs, modage2_2rsx, modage2_2rsx2),
custom.model.names = c("No Interaction", "3 Interactions", "2 Interactions"))
Statistical models
Â
|
No Interaction
|
3 Interactions
|
2 Interactions
|
(Intercept)
|
1.75***
|
1.75***
|
1.75***
|
Â
|
(0.05)
|
(0.05)
|
(0.05)
|
kidage6
|
0.92***
|
0.92***
|
0.92***
|
Â
|
(0.03)
|
(0.03)
|
(0.03)
|
kidagesqr
|
-0.05***
|
-0.05***
|
-0.05***
|
Â
|
(0.00)
|
(0.00)
|
(0.00)
|
c_momage
|
0.03
|
0.02
|
0.02
|
Â
|
(0.02)
|
(0.02)
|
(0.02)
|
c_homecog
|
0.04*
|
0.03*
|
0.04*
|
Â
|
(0.01)
|
(0.02)
|
(0.01)
|
c_homeemo
|
0.00
|
-0.01
|
-0.01
|
Â
|
(0.02)
|
(0.02)
|
(0.02)
|
kidage6:c_momage
|
Â
|
0.01
|
0.01*
|
Â
|
Â
|
(0.01)
|
(0.01)
|
kidage6:c_homeemo
|
Â
|
0.01**
|
0.01***
|
Â
|
Â
|
(0.00)
|
(0.00)
|
kidage6:c_homecog
|
Â
|
0.01
|
Â
|
Â
|
Â
|
(0.00)
|
Â
|
AIC
|
3035.44
|
3018.90
|
3019.29
|
BIC
|
3087.33
|
3086.36
|
3081.56
|
Log Likelihood
|
-1507.72
|
-1496.45
|
-1497.64
|
Num. obs.
|
1325
|
1325
|
1325
|
Num. groups: id
|
405
|
405
|
405
|
Var: id (Intercept)
|
0.21
|
0.21
|
0.21
|
Var: id kidage6
|
0.02
|
0.02
|
0.02
|
Cov: id (Intercept) kidage6
|
0.04
|
0.04
|
0.04
|
Var: Residual
|
0.28
|
0.28
|
0.28
|
p < 0.001; p < 0.01; p < 0.05
|
anova(modage2_2rs, modage2_2rsx, modage2_2rsx2)
Data: curranlong
Models:
modage2_2rs: read ~ kidage6 + kidagesqr + c_momage + c_homecog + c_homeemo +
modage2_2rs: (1 + kidage6 | id)
modage2_2rsx2: read ~ kidage6 + kidagesqr + c_momage + c_homecog + c_homeemo +
modage2_2rsx2: (1 + kidage6 | id) + kidage6:c_momage + kidage6:c_homeemo
modage2_2rsx: read ~ kidage6 + kidagesqr + c_momage + c_homecog + c_homeemo +
modage2_2rsx: (1 + kidage6 | id) + kidage6:c_momage + kidage6:c_homeemo +
modage2_2rsx: kidage6:c_homecog
npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
modage2_2rs 10 3035.4 3087.3 -1507.7 3015.4
modage2_2rsx2 12 3019.3 3081.6 -1497.6 2995.3 20.1558 2 4.2e-05 ***
modage2_2rsx 13 3018.9 3086.4 -1496.5 2992.9 2.3873 1 0.1223
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Respecify model without centering for plots
modfull <- lmer(read ~ kidagetv + I(kidagetv^2) + momage + homecog + homeemo +
kidagetv:momage + kidagetv:homeemo +
(kidagetv | id), curranlong, REML = FALSE)
# Interactions for final model
interact_plot(modfull, kidagetv, momage, modx.values = c(21, 25, 29))
interact_plot(modfull, kidagetv, homeemo, modx.values = c(0, 9.2, 13))
sim_slopes(modfull, kidagetv, momage, johnson_neyman = TRUE, jnplot = TRUE)
JOHNSON-NEYMAN INTERVAL
When momage is OUTSIDE the interval [-430.47, -0.34], the slope of kidagetv
is p < .05.
Note: The range of observed values of momage is [21.00, 29.00]
SIMPLE SLOPES ANALYSIS
Slope of kidagetv when momage = 23.68 (- 1 SD):
Est. S.E. t val. p
------ ------ -------- ------
0.56 0.01 40.03 0.00
Slope of kidagetv when momage = 25.54 (Mean):
Est. S.E. t val. p
------ ------ -------- ------
0.58 0.01 58.86 0.00
Slope of kidagetv when momage = 27.41 (+ 1 SD):
Est. S.E. t val. p
------ ------ -------- ------
0.60 0.01 42.32 0.00
sim_slopes(modfull, kidagetv, momage, johnson_neyman = TRUE, jnplot = TRUE)
JOHNSON-NEYMAN INTERVAL
When momage is OUTSIDE the interval [-430.47, -0.34], the slope of kidagetv
is p < .05.
Note: The range of observed values of momage is [21.00, 29.00]
SIMPLE SLOPES ANALYSIS
Slope of kidagetv when momage = 23.68 (- 1 SD):
Est. S.E. t val. p
------ ------ -------- ------
0.56 0.01 40.03 0.00
Slope of kidagetv when momage = 25.54 (Mean):
Est. S.E. t val. p
------ ------ -------- ------
0.58 0.01 58.86 0.00
Slope of kidagetv when momage = 27.41 (+ 1 SD):
Est. S.E. t val. p
------ ------ -------- ------
0.60 0.01 42.32 0.00