Example with Varying Occasions

This example builds on the tutorial about college GPA, in Analysing Longitudinal Data: Fixed Occasions. Here, instead of analyzing data in which subjects are measured at the same time points, we have data where the subjects were measured at varying time points.

Here is the description from the Multilevel Analysis website for the Hox, Moerbeek, & van de Schoot (2018) text:

The data are a sample of 405 children who were within the first two years of entry to elementary school. The data consist of four repeated measures of both the child’s antisocial behavior and the child’s reading recognition skills. In addition, on the first measurement occasion, measures were collected of emotional support and cognitive stimulation provided by the mother. The data were collected using face-to-face interviews of both the child and the mother at two-year intervals between 1986 and 1992."

Variables

variable name variable label
id Child identification number
anti1-4 antisocial behavior time 1,2,3,4
read1-4 reading ability time 1,2,3,4
kidgen gender child (0 = girl, 1 = boy)
momage age mother at time 1
kidage age child at time 1
homecog cognitive stimulation
homeemo emotional support
nmis number of missing values

Import and Clean Data

curran <- read_sav("data/CurranData.sav")
curran <- within(curran, {
  id <- factor(id)
  kidgen <- factor(kidgen, labels = c("girl", "boy"))
})

curranlong <- gather(curran, key = key, value = val, anti1:read4)
curranlong <- separate(curranlong, 
                       col = key, 
                       into = c("variable", "occasion"), 
                       sep = 4)
curranlong <- spread(curranlong, key = variable, value = val)
curranlong <- arrange(curranlong, id, occasion)
curranlong <- within(curranlong, {
  occasion <- as.numeric(occasion)-1
  kidagetv <- kidage + 2*occasion
})
curranlong <- subset(curranlong, !is.na(read) | !is.na(anti))
# or with dplyr
# curranlong <- filter(!is.na(read) | !is.na(anti))
tapply(curranlong$read, curranlong$occasion, function(x) sum(!is.na(x)))
  0   1   2   3 
405 375 275 270 
ggplot(curranlong, aes(y = read, x = occasion)) + geom_point()

ggplot(curranlong, aes(y = read, x = kidage)) + geom_point()

ggplot(curranlong, aes(y = read, x = kidagetv)) + geom_point()

ggplot(curranlong, aes(y = read, x = kidagetv, group = id)) + geom_point() +
  geom_line(alpha = .3)

Missing Data Analysis

library(VIM)

curran <- read_sav("data/CurranData.sav")

curran <- within(curran, {
  id <- factor(id)
  kidgen <- factor(kidgen, labels = c("girl", "boy"))
})

# Create long version of data
curranlong <- curran %>% 
  gather( key = key, value = val, anti1:read4) %>% 
  separate(col = key, into = c("variable", "occasion"), sep = 4) %>% 
  spread(key = variable, value = val) %>% 
  arrange(id, occasion) %>% 
  mutate(time = as.numeric(occasion)-1,
         kidagetv = kidage + 2*time,
         kidage6 = kidagetv - 6,
         kidagesqr = kidage6^2,
         c_momage = momage - mean(momage),
         c_homecog = homecog - mean(homecog),
         c_homeemo = homeemo - mean(homeemo)) # %>% 
#  filter(!is.na(read) | !is.na(anti))
aggr(curranlong, numbers = TRUE, prop = TRUE)

curranlong <- filter(curranlong, !is.na(read) | !is.na(anti))
aggr(curranlong, numbers = TRUE)

Models

curran <- read_sav("data/CurranData.sav")

curran <- within(curran, {
  id <- factor(id)
  kidgen <- factor(kidgen, labels = c("girl", "boy"))
})
# Create long version of data
curranlong <- curran %>% 
  gather( key = key, value = val, anti1:read4) %>% 
  separate(col = key, into = c("variable", "occasion"), sep = 4) %>% 
  spread(key = variable, value = val) %>% 
  arrange(id, occasion) %>% 
  mutate(time = as.numeric(occasion)-1,
         kidagetv = kidage + 2*time,
         kidage6 = kidagetv - 6,
         kidagesqr = kidage6^2,
         c_momage = momage - mean(momage),
         c_homecog = homecog - mean(homecog),
         c_homeemo = homeemo - mean(homeemo)) %>% 
  filter(!is.na(read) | !is.na(anti))
mod0 <- lmer(read ~ 1 + (1 | id), curranlong, REML = FALSE)
modage <- lmer(read ~ kidage6 + (1 | id), curranlong, REML = FALSE)

modage2 <-             update(modage, . ~ . + kidagesqr)
modage2_2 <-          update(modage2, . ~ . + c_momage + c_homecog + c_homeemo)
modage2_2rs <-      update(modage2_2, . ~ . - (1 | id) + (1 + kidage6 | id))
modage2_2rsx <-   update(modage2_2rs, . ~ . + kidage6:c_momage + 
                                              kidage6:c_homeemo +
                                              kidage6:c_homecog)

modage2_2rsx2 <- update(modage2_2rsx, . ~ . - kidage6:c_homecog)

Baseline Models: Modeling Child Age

htmlreg(list(mod0, modage, modage2), 
          custom.model.names = c("Null", "Linear Age", "Quadratic Age"))
Statistical models
  Null Linear Age Quadratic Age
(Intercept) 4.11*** 2.19*** 1.74***
  (0.05) (0.05) (0.06)
kidage6   0.55*** 0.93***
    (0.01) (0.03)
kidagesqr     -0.05***
      (0.00)
AIC 5057.83 3421.92 3255.05
BIC 5073.40 3442.68 3280.99
Log Likelihood -2525.91 -1706.96 -1622.52
Num. obs. 1325 1325 1325
Num. groups: id 405 405 405
Var: id (Intercept) 0.30 0.65 0.66
Var: Residual 2.39 0.46 0.39
p < 0.001; p < 0.01; p < 0.05
anova(modage, modage2)
Data: curranlong
Models:
modage: read ~ kidage6 + (1 | id)
modage2: read ~ kidage6 + (1 | id) + kidagesqr
        npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)    
modage     4 3421.9 3442.7 -1707.0   3413.9                         
modage2    5 3255.0 3281.0 -1622.5   3245.0 168.87  1  < 2.2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Models with Level 2 Predictors

Note, we are not considering the antisocial behavior in these models.

htmlreg(list(modage2, modage2_2),
          custom.model.names = c("Quadratic", "Quadratic+L2 pred"))
Statistical models
  Quadratic Quadratic+L2 pred
(Intercept) 1.74*** 1.74***
  (0.06) (0.06)
kidage6 0.93*** 0.92***
  (0.03) (0.03)
kidagesqr -0.05*** -0.05***
  (0.00) (0.00)
c_momage   0.05*
    (0.02)
c_homecog   0.05**
    (0.02)
c_homeemo   0.04*
    (0.02)
AIC 3255.05 3232.20
BIC 3280.99 3273.71
Log Likelihood -1622.52 -1608.10
Num. obs. 1325 1325
Num. groups: id 405 405
Var: id (Intercept) 0.66 0.60
Var: Residual 0.39 0.39
p < 0.001; p < 0.01; p < 0.05
anova(modage2, modage2_2)
Data: curranlong
Models:
modage2: read ~ kidage6 + (1 | id) + kidagesqr
modage2_2: read ~ kidage6 + (1 | id) + kidagesqr + c_momage + c_homecog + 
modage2_2:     c_homeemo
          npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)    
modage2      5 3255.0 3281.0 -1622.5   3245.0                         
modage2_2    8 3232.2 3273.7 -1608.1   3216.2 28.852  3  2.406e-06 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Model with Random Slope of age

htmlreg(list(modage2_2, modage2_2rs), 
          custom.model.names = c("Fixed Age Curve", "Random Age Curve"))
Statistical models
  Fixed Age Curve Random Age Curve
(Intercept) 1.74*** 1.75***
  (0.06) (0.05)
kidage6 0.92*** 0.92***
  (0.03) (0.03)
kidagesqr -0.05*** -0.05***
  (0.00) (0.00)
c_momage 0.05* 0.03
  (0.02) (0.02)
c_homecog 0.05** 0.04*
  (0.02) (0.01)
c_homeemo 0.04* 0.00
  (0.02) (0.02)
AIC 3232.20 3035.44
BIC 3273.71 3087.33
Log Likelihood -1608.10 -1507.72
Num. obs. 1325 1325
Num. groups: id 405 405
Var: id (Intercept) 0.60 0.21
Var: Residual 0.39 0.28
Var: id kidage6   0.02
Cov: id (Intercept) kidage6   0.04
p < 0.001; p < 0.01; p < 0.05
ranova(modage2_2rs)
ANOVA-like table for random-effects: Single term deletions

Model:
read ~ kidage6 + kidagesqr + c_momage + c_homecog + c_homeemo + 
    (1 + kidage6 | id)
                              npar  logLik    AIC    LRT Df Pr(>Chisq)    
<none>                          10 -1507.7 3035.4                         
kidage6 in (1 + kidage6 | id)    8 -1608.1 3232.2 200.76  2  < 2.2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Model with Cross-level Interactions

htmlreg(list(modage2_2rs, modage2_2rsx, modage2_2rsx2),
          custom.model.names = c("No Interaction", "3 Interactions", "2 Interactions"))
Statistical models
  No Interaction 3 Interactions 2 Interactions
(Intercept) 1.75*** 1.75*** 1.75***
  (0.05) (0.05) (0.05)
kidage6 0.92*** 0.92*** 0.92***
  (0.03) (0.03) (0.03)
kidagesqr -0.05*** -0.05*** -0.05***
  (0.00) (0.00) (0.00)
c_momage 0.03 0.02 0.02
  (0.02) (0.02) (0.02)
c_homecog 0.04* 0.03* 0.04*
  (0.01) (0.02) (0.01)
c_homeemo 0.00 -0.01 -0.01
  (0.02) (0.02) (0.02)
kidage6:c_momage   0.01 0.01*
    (0.01) (0.01)
kidage6:c_homeemo   0.01** 0.01***
    (0.00) (0.00)
kidage6:c_homecog   0.01  
    (0.00)  
AIC 3035.44 3018.90 3019.29
BIC 3087.33 3086.36 3081.56
Log Likelihood -1507.72 -1496.45 -1497.64
Num. obs. 1325 1325 1325
Num. groups: id 405 405 405
Var: id (Intercept) 0.21 0.21 0.21
Var: id kidage6 0.02 0.02 0.02
Cov: id (Intercept) kidage6 0.04 0.04 0.04
Var: Residual 0.28 0.28 0.28
p < 0.001; p < 0.01; p < 0.05
anova(modage2_2rs, modage2_2rsx, modage2_2rsx2)
Data: curranlong
Models:
modage2_2rs: read ~ kidage6 + kidagesqr + c_momage + c_homecog + c_homeemo + 
modage2_2rs:     (1 + kidage6 | id)
modage2_2rsx2: read ~ kidage6 + kidagesqr + c_momage + c_homecog + c_homeemo + 
modage2_2rsx2:     (1 + kidage6 | id) + kidage6:c_momage + kidage6:c_homeemo
modage2_2rsx: read ~ kidage6 + kidagesqr + c_momage + c_homecog + c_homeemo + 
modage2_2rsx:     (1 + kidage6 | id) + kidage6:c_momage + kidage6:c_homeemo + 
modage2_2rsx:     kidage6:c_homecog
              npar    AIC    BIC  logLik deviance   Chisq Df Pr(>Chisq)    
modage2_2rs     10 3035.4 3087.3 -1507.7   3015.4                          
modage2_2rsx2   12 3019.3 3081.6 -1497.6   2995.3 20.1558  2    4.2e-05 ***
modage2_2rsx    13 3018.9 3086.4 -1496.5   2992.9  2.3873  1     0.1223    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Respecify model without centering for plots
modfull <- lmer(read ~ kidagetv + I(kidagetv^2) + momage + homecog + homeemo + 
                  kidagetv:momage + kidagetv:homeemo +
                  (kidagetv | id), curranlong, REML = FALSE)
# Interactions for final model
interact_plot(modfull, kidagetv, momage, modx.values = c(21, 25, 29))

interact_plot(modfull, kidagetv, homeemo, modx.values = c(0, 9.2, 13))

sim_slopes(modfull, kidagetv, momage, johnson_neyman = TRUE, jnplot = TRUE)
JOHNSON-NEYMAN INTERVAL 

When momage is OUTSIDE the interval [-430.47, -0.34], the slope of kidagetv
is p < .05.

Note: The range of observed values of momage is [21.00, 29.00]

SIMPLE SLOPES ANALYSIS 

Slope of kidagetv when momage = 23.68 (- 1 SD): 

  Est.   S.E.   t val.      p
------ ------ -------- ------
  0.56   0.01    40.03   0.00

Slope of kidagetv when momage = 25.54 (Mean): 

  Est.   S.E.   t val.      p
------ ------ -------- ------
  0.58   0.01    58.86   0.00

Slope of kidagetv when momage = 27.41 (+ 1 SD): 

  Est.   S.E.   t val.      p
------ ------ -------- ------
  0.60   0.01    42.32   0.00
sim_slopes(modfull, kidagetv, momage, johnson_neyman = TRUE, jnplot = TRUE)
JOHNSON-NEYMAN INTERVAL 

When momage is OUTSIDE the interval [-430.47, -0.34], the slope of kidagetv
is p < .05.

Note: The range of observed values of momage is [21.00, 29.00]

SIMPLE SLOPES ANALYSIS 

Slope of kidagetv when momage = 23.68 (- 1 SD): 

  Est.   S.E.   t val.      p
------ ------ -------- ------
  0.56   0.01    40.03   0.00

Slope of kidagetv when momage = 25.54 (Mean): 

  Est.   S.E.   t val.      p
------ ------ -------- ------
  0.58   0.01    58.86   0.00

Slope of kidagetv when momage = 27.41 (+ 1 SD): 

  Est.   S.E.   t val.      p
------ ------ -------- ------
  0.60   0.01    42.32   0.00