20 GLMM, Binary Outcome: Muscatine Obesity

20.1 Packages

20.1.1 CRAN

library(tidyverse)    # all things tidy
library(pander)       # nice looking genderal tabulations
library(furniture)    # nice table1() descriptives
library(texreg)       # Convert Regression Output to LaTeX or HTML Tables
library(psych)        # contains some useful functions, like headTail

library(lme4)         # Linear, generalized linear, & nonlinear mixed models
library(gee)          # Generalized Estimating Equations
library(effects)      # Plotting estimated marginal means
library(performance)
library(interactions)

library(patchwork)    # combining graphics

20.1.2 GitHub

Helper extract functions for exponentiating parameters form generalized regression models within a texreg table of model parameters.

# install.packages("devtools")
# library(devtools)
# install_github("SarBearSchwartz/texreghelpr")

library(texreghelpr)

20.2 Data Prep

Data on Obesity from the Muscatine Coronary Risk Factor Study.

Source:

Table 10 (page 96) in Woolson and Clarke (1984). With permission of Blackwell Publishing.

Reference:

Woolson, R.F. and Clarke, W.R. (1984). Analysis of categorical incompletel longitudinal data. Journal of the Royal Statistical Society, Series A, 147, 87-99.

Description:

The Muscatine Coronary Risk Factor Study (MCRFS) was a longitudinal study of coronary risk factors in school children in Muscatine, Iowa (Woolson and Clarke 1984; Ekholm and Skinner 1998). Five cohorts of children were measured for height and weight in 1977, 1979, and 1981. Relative weight was calculated as the ratio of a child’s observed weight to the median weight for their age-sex-height group. Children with a relative weight greater than 110% of the median weight for their respective stratum were classified as obese. The analysis of this study involves binary data (1 = obese, 0 = not obese) collected at successive time points.

This data was also using in an article title “Missing data methods in longitudinal studies: a review” (https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3016756/).

Variable List:

  • Indicators

    • id Child’s unique identification number
    • occas Occasion number: 1, 2, 3
  • Outcome or dependent variable

    • obesity Obesity Status, 0 = no, 1 = yes
  • Main predictor or independent variable of interest

    • gender 0 = Male, 1 = Female
    • baseage Baseline Age, mid-point of age-cohort
    • currage Current Age, mid-point of age-cohort

20.2.1 Import

data_raw <- read.table("https://raw.githubusercontent.com/CEHS-research/data/master/MLM/Muscatine.txt", header=TRUE)
str(data_raw)
'data.frame':   14568 obs. of  6 variables:
 $ id     : int  1 1 1 2 2 2 3 3 3 4 ...
 $ gender : int  0 0 0 0 0 0 0 0 0 0 ...
 $ baseage: int  6 6 6 6 6 6 6 6 6 6 ...
 $ currage: int  6 8 10 6 8 10 6 8 10 6 ...
 $ occas  : int  1 2 3 1 2 3 1 2 3 1 ...
 $ obesity: chr  "1" "1" "1" "1" ...
psych::headTail(data_raw, top = 10)
        id gender baseage currage occas obesity
1        1      0       6       6     1       1
2        1      0       6       8     2       1
3        1      0       6      10     3       1
4        2      0       6       6     1       1
5        2      0       6       8     2       1
6        2      0       6      10     3       1
7        3      0       6       6     1       1
8        3      0       6       8     2       1
9        3      0       6      10     3       1
10       4      0       6       6     1       1
...    ...    ...     ...     ...   ...    <NA>
14565 4855      1      14      18     3       0
14566 4856      1      14      14     1       .
14567 4856      1      14      16     2       .
14568 4856      1      14      18     3       0

20.2.2 Restrict to 350ID’s of children with complete data for Class Demonstration

Dealing with missing-ness and its implications are beyond the score of this class. Instead we are going to restrict our class analysis to a subset of 350 children who have complete data

I am using the set.seed() function so that I can replicate the restults later.

complete_ids <- data_raw %>% 
  dplyr::filter(obesity %in% c("0", "1")) %>%
  dplyr::group_by(id) %>% 
  dplyr::summarise(n = n()) %>% 
  dplyr::filter(n == 3) %>% 
  dplyr::pull(id)

set.seed(8892)  # needed?

use_ids <- complete_ids %>% sample(350)

head(use_ids)
[1] 3574  805 3458 3537  679  655

20.2.3 Long Format

data_long <- data_raw %>%  
  dplyr::filter(id %in% use_ids) %>% 
  mutate(id       = id     %>% factor) %>% 
  mutate(gender   = gender %>% factor(levels = 0:1, 
                                      labels = c("Male", "Female"))) %>% 
  mutate(age_base = baseage %>% factor) %>% 
  mutate(age_curr = currage %>% factor) %>% 
  mutate(occation = occas   %>% factor) %>% 
  mutate(obesity  = obesity %>% factor(levels = 0:1, 
                                       labels = c("No", "Yes"))) %>% 
  select(id, gender, age_base, age_curr, occation, obesity)
str(data_long)
'data.frame':   1050 obs. of  6 variables:
 $ id      : Factor w/ 350 levels "1","5","10","16",..: 1 1 1 2 2 2 3 3 3 4 ...
 $ gender  : Factor w/ 2 levels "Male","Female": 1 1 1 1 1 1 1 1 1 1 ...
 $ age_base: Factor w/ 5 levels "6","8","10","12",..: 1 1 1 1 1 1 2 2 2 2 ...
 $ age_curr: Factor w/ 7 levels "6","8","10","12",..: 1 2 3 1 2 3 2 3 4 2 ...
 $ occation: Factor w/ 3 levels "1","2","3": 1 2 3 1 2 3 1 2 3 1 ...
 $ obesity : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
psych::headTail(data_long, top = 10)
       id gender age_base age_curr occation obesity
1       1   Male        6        6        1     Yes
2       1   Male        6        8        2     Yes
3       1   Male        6       10        3     Yes
4       5   Male        6        6        1     Yes
5       5   Male        6        8        2     Yes
6       5   Male        6       10        3     Yes
7      10   Male        8        8        1     Yes
8      10   Male        8       10        2     Yes
9      10   Male        8       12        3     Yes
10     16   Male        8        8        1     Yes
...  <NA>   <NA>     <NA>     <NA>     <NA>    <NA>
1047 3582 Female       14       18        3      No
1048 3584 Female       14       14        1      No
1049 3584 Female       14       16        2      No
1050 3584 Female       14       18        3      No

20.2.4 Wide Format

data_wide <- data_long %>% 
  tidyr::pivot_wider(names_from = occation,
                     names_sep = "_",
                     values_from = c(obesity, age_curr)) %>% 
  mutate_if(is.character, factor)%>% 
  group_by(id) %>% 
  mutate(num_miss = sum(is.na(c(obesity_1, obesity_2, obesity_3)))) %>% 
  ungroup() %>% 
  mutate(num_miss = as.factor(num_miss))
str(data_wide)
tibble [350 × 10] (S3: tbl_df/tbl/data.frame)
 $ id        : Factor w/ 350 levels "1","5","10","16",..: 1 2 3 4 5 6 7 8 9 10 ...
 $ gender    : Factor w/ 2 levels "Male","Female": 1 1 1 1 1 1 1 1 1 1 ...
 $ age_base  : Factor w/ 5 levels "6","8","10","12",..: 1 1 2 2 2 3 3 3 4 4 ...
 $ obesity_1 : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
 $ obesity_2 : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
 $ obesity_3 : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
 $ age_curr_1: Factor w/ 7 levels "6","8","10","12",..: 1 1 2 2 2 3 3 3 4 4 ...
 $ age_curr_2: Factor w/ 7 levels "6","8","10","12",..: 2 2 3 3 3 4 4 4 5 5 ...
 $ age_curr_3: Factor w/ 7 levels "6","8","10","12",..: 3 3 4 4 4 5 5 5 6 6 ...
 $ num_miss  : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
psych::headTail(data_wide, top = 10)
     id gender age_base obesity_1 obesity_2 obesity_3 age_curr_1 age_curr_2
1     1   Male        6       Yes       Yes       Yes          6          8
2     5   Male        6       Yes       Yes       Yes          6          8
3    10   Male        8       Yes       Yes       Yes          8         10
4    16   Male        8       Yes       Yes       Yes          8         10
5    21   Male        8       Yes       Yes       Yes          8         10
6    30   Male       10       Yes       Yes       Yes         10         12
7    44   Male       10       Yes       Yes       Yes         10         12
8    50   Male       10       Yes       Yes       Yes         10         12
9    60   Male       12       Yes       Yes       Yes         12         14
10   61   Male       12       Yes       Yes       Yes         12         14
11 <NA>   <NA>     <NA>      <NA>      <NA>      <NA>       <NA>       <NA>
12 3580 Female       14        No        No        No         14         16
13 3581 Female       14        No        No        No         14         16
14 3582 Female       14        No        No        No         14         16
15 3584 Female       14        No        No        No         14         16
   age_curr_3 num_miss
1          10        0
2          10        0
3          12        0
4          12        0
5          12        0
6          14        0
7          14        0
8          14        0
9          16        0
10         16        0
11       <NA>     <NA>
12         18        0
13         18        0
14         18        0
15         18        0

20.3 Exploratory Data Analysis

20.3.1 Summary Statistics

20.3.1.1 Demographics and Baseline

data_wide %>% 
  dplyr::group_by(gender) %>% 
  furniture::table1("Baseline Age" = age_base, 
                    "Baseline Obesity" = obesity_1,
                    total   = TRUE,
                    test    = TRUE,
                    na.rm   = FALSE,
                    output  = "markdown")
Total Male Female P-Value
n = 350 n = 166 n = 184
Baseline Age 0.759
6 42 (12%) 17 (10.2%) 25 (13.6%)
8 91 (26%) 47 (28.3%) 44 (23.9%)
10 92 (26.3%) 41 (24.7%) 51 (27.7%)
12 67 (19.1%) 33 (19.9%) 34 (18.5%)
14 58 (16.6%) 28 (16.9%) 30 (16.3%)
NA 0 (0%) 0 (0%) 0 (0%)
Baseline Obesity 0.608
No 286 (81.7%) 138 (83.1%) 148 (80.4%)
Yes 64 (18.3%) 28 (16.9%) 36 (19.6%)
NA 0 (0%) 0 (0%) 0 (0%)

20.3.1.2 Status over Time

data_summary <- data_long %>% 
  dplyr::group_by(gender, age_curr) %>% 
  dplyr::mutate(obesityN = case_when(obesity == "Yes" ~ 1,
                                     obesity == "No"  ~ 0)) %>% 
  dplyr::filter(complete.cases(gender, age_curr, obesityN)) %>% 
  dplyr::summarise(n = n(),
                   prob_est = mean(obesityN),
                   prob_SD  = sd(obesityN),
                   prob_SE  = prob_SD/sqrt(n))

data_summary
# A tibble: 14 × 6
# Groups:   gender [2]
   gender age_curr     n prob_est prob_SD prob_SE
   <fct>  <fct>    <int>    <dbl>   <dbl>   <dbl>
 1 Male   6           17    0.118   0.332  0.0805
 2 Male   8           64    0.172   0.380  0.0475
 3 Male   10         105    0.143   0.352  0.0343
 4 Male   12         121    0.198   0.400  0.0364
 5 Male   14         102    0.225   0.420  0.0416
 6 Male   16          61    0.213   0.413  0.0529
 7 Male   18          28    0.143   0.356  0.0673
 8 Female 6           25    0.16    0.374  0.0748
 9 Female 8           69    0.203   0.405  0.0488
10 Female 10         120    0.275   0.448  0.0409
11 Female 12         129    0.256   0.438  0.0386
12 Female 14         115    0.243   0.431  0.0402
13 Female 16          64    0.281   0.453  0.0566
14 Female 18          30    0.267   0.450  0.0821

20.3.2 Visualize

20.3.2.1 By cohort and gender

data_long %>% 
  dplyr::group_by(gender, age_base, age_curr) %>% 
  dplyr::mutate(obesityN = case_when(obesity == "Yes" ~ 1,
                                     obesity == "No"  ~ 0)) %>% 
  dplyr::filter(complete.cases(gender, age_curr, obesityN)) %>% 
  dplyr::summarise(n = n(),
                   prob_est = mean(obesityN),
                   prob_SD  = sd(obesityN),
                   prob_SE  = prob_SD/sqrt(n)) %>% 
  ggplot(aes(x = age_curr,
             y = prob_est,
             group = age_base,
             color = age_base)) +
  geom_point() +
  geom_line() +
  theme_bw() + 
  labs(x = "Child's Age, years",
       y = "Proportion Obese") +
  facet_grid(. ~ gender)

20.3.2.2 BY only gender

data_summary %>% 
  ggplot(aes(x = age_curr,
             y = prob_est,
             group = gender)) +
  geom_ribbon(aes(ymin = prob_est - prob_SE,
                  ymax = prob_est + prob_SE,
                  fill = gender),
              alpha = .3) +
  geom_point(aes(color = gender,
                 shape = gender)) +
  geom_line(aes(linetype = gender,
                color = gender)) +
  theme_bw() + 
  scale_color_manual(values = c("dodger blue", "hot pink")) +
  scale_fill_manual(values = c("dodger blue", "hot pink")) +
  labs(x = "Child's Age, years",
       y = "Proportion Obese")

Smooth out the trends

data_summary %>% 
  ggplot(aes(x = age_curr,
             y = prob_est,
             group = gender,
             color = gender)) +
  geom_smooth(method = "lm",
              formula = y ~ poly(x, 2),
              se = FALSE) +
  theme_bw() + 
  scale_color_manual(values = c("dodger blue", "hot pink")) +
  scale_fill_manual(values = c("dodger blue", "hot pink")) +
  labs(x = "Child's Age, years",
       y = "Proportion Obese")

20.4 Analysis Goal

Does risk of obesity increase with age and are patterns of change similar for both sexes?

There are 5 age cohorts that were measured each for 3 years, baseage and currage are age midpoints of those cohort groups. Which to include, current age or occasion? Assume no cohort effects. If you do think this is an issue, include baseline age (age_base) and current age minus baseline age (time) in model.

data_long %>% 
  group_by(gender, age_base, occation) %>% 
  summarise(n       = n(),
            count   = sum(obesity == "Yes"),
            prop    = mean(obesity == "Yes"),
            se      = sd(obesity == "Yes")/sqrt(n)) %>%
  mutate(time = (occation %>% as.numeric) * 2 - 2) %>% 
  ggplot(aes(x = time,
             y = prop,
             fill = gender))  +
  geom_ribbon(aes(ymin = prop - se,
                  ymax = prop + se),
              alpha = 0.2) +
  geom_point(aes(color = gender)) +
  geom_line(aes(color = gender)) +
  theme_bw() +
  facet_wrap(~ age_base, labeller = label_both) +
  labs(title    = "Observed Obesity Rates, by Gender within Cohort",
       subtitle = "Subset of 350 children with complete data",
       x        = "Time, years from 1977",
       y        = "Proportion of Children Characterized as Obese") +
  scale_fill_manual(values = c("dodgerblue3", "red")) +
  scale_color_manual(values = c("dodgerblue3", "red")) +
  scale_x_continuous(breaks = seq(from = 0, to = 4, by = 2)) +
  theme(legend.position = c(1, 0),
        legend.justification = c(1, 0),
        legend.background = element_rect(color = "black"))

data_long %>% 
  group_by(gender, age_curr) %>% 
  summarise(n       = n(),
            count   = sum(obesity == "Yes"),
            prop    = mean(obesity == "Yes"),
            se      = sd(obesity == "Yes")/sqrt(n)) %>%
  ggplot(aes(x = age_curr %>% as.character %>% as.numeric,
             y = prop,
             group = gender,
             fill = gender))  +
  geom_ribbon(aes(ymin = prop - se,
                  ymax = prop + se),
              alpha = 0.2) +
  geom_point(aes(color = gender)) +
  geom_line(aes(color = gender)) +
  theme_bw() +
  geom_vline(xintercept = 12, 
             linetype   = "dashed", 
             size       = 1, 
             color      = "navyblue") +
  labs(title    = "Observed Obesity Rates, by Gender (collapsing cohorts)",
       subtitle = "Subset of 350 children with complete data",
       x        = "Age of Child, years",
       y        = "Proportion of Children Characterized as Obese") +
  scale_fill_manual(values = c("dodgerblue3", "red")) +
  scale_color_manual(values = c("dodgerblue3", "red")) +
  scale_x_continuous(breaks = seq(from = 6, to = 18, by = 2)) +
  theme(legend.position = c(0, 1),
        legend.justification = c(-0.05, 1.05),
        legend.background = element_rect(color = "black"))

20.4.1 Center time at twelve years old

data_long <- data_long %>% 
  dplyr::mutate(age_center = age_curr %>% as.character %>% as.numeric -12) %>% 
  dplyr::mutate(obesity_num = obesity %>% as.numeric - 1)

psych::headTail(data_long)
       id gender age_base age_curr occation obesity age_center obesity_num
1       1   Male        6        6        1     Yes         -6           1
2       1   Male        6        8        2     Yes         -4           1
3       1   Male        6       10        3     Yes         -2           1
4       5   Male        6        6        1     Yes         -6           1
...  <NA>   <NA>     <NA>     <NA>     <NA>    <NA>        ...         ...
1047 3582 Female       14       18        3      No          6           0
1048 3584 Female       14       14        1      No          2           0
1049 3584 Female       14       16        2      No          4           0
1050 3584 Female       14       18        3      No          6           0

20.5 GLM Analysis

20.5.1 Standard logistic regression

fit_glm_1 <- glm(obesity_num ~ gender*age_center + gender*I(age_center^2), 
                 data   = data_long,
                 family = binomial(link = "logit"))

fit_glm_2 <- glm(obesity_num ~ gender + age_center + I(age_center^2), 
                 data   = data_long,
                 family = binomial(link = "logit"))
texreg::knitreg(list(extract_glm_exp(fit_glm_1), 
                     extract_glm_exp(fit_glm_2)),
                  custom.model.names = c("Interaction",
                                         "Main Effects"),
                  caption = "GLM: Parameter EStimates",
                  single.row = TRUE,
                  ci.test = 1)
GLM: Parameter EStimates
  Interaction Main Effects
(Intercept) 0.25 [0.18; 0.33]* 0.24 [0.19; 0.31]*
genderFemale 1.43 [0.97; 2.12] 1.48 [1.10; 2.00]*
age_center 1.05 [0.97; 1.14] 1.04 [0.99; 1.10]
age_center^2 0.99 [0.96; 1.01] 0.99 [0.98; 1.01]
genderFemale:age_center 0.99 [0.89; 1.09]  
genderFemale:age_center^2 1.00 [0.97; 1.04]  
* Null hypothesis value outside the confidence interval.
plot_pred_glm <- Effect(c("gender", "age_center"), 
       fit_glm_2,
       xlevels = list(age_center = seq(from = -6, to = 6, by = 0.25))) %>% 
   data.frame %>%
   mutate(age = age_center + 12) %>% 
  dplyr::mutate(gender = forcats::fct_rev(gender)) %>% 
   ggplot(aes(x        = age,
              y        = fit,
              group    = gender,
              linetype = gender,
              fill     = gender)) +
  geom_ribbon(aes(ymin = fit - se,
                  ymax = fit + se),
              alpha = .3) +
   geom_line(aes(color = gender),
             size = 1.5) +
   theme_bw() +
   labs(title    = "Generalized Linear Model: Model #2",
        x        = "Child's Age, years",
        y        = "Predicted Probability of Obesity",
        linetype = "Gender",
        fill     = "Gender",
        color    = "Gender") +
   theme(legend.position = c(0, 1),
         legend.justification = c(-0.05, 1.05),
         legend.background = element_rect(color = "black"),
         legend.key.width = unit(2, "cm")) +
  scale_x_continuous(breaks = seq(from = 6, to = 18, by = 3)) 

plot_pred_glm 

20.6 GEE Analysis

ALWAYS: fix the scale parameter to 1 with binomial data!!!

fit_gee_1in <- gee::gee(obesity_num ~ gender*age_center + gender*I(age_center^2), 
                        id          = id, 
                        data        = data_long,
                        family      = binomial(link = "logit"),
                        corstr      = 'independence', 
                        scale.fix   = TRUE,
                        scale.value = 1)
                 (Intercept)                 genderFemale 
                   -1.398469                     0.361111 
                  age_center              I(age_center^2) 
                    0.048638                    -0.011231 
     genderFemale:age_center genderFemale:I(age_center^2) 
                   -0.014399                     0.004094 
fit_gee_1ex <- gee::gee(obesity_num ~ gender*age_center + gender*I(age_center^2), 
                        id          = id, 
                        data        = data_long,
                        family      = binomial(link = "logit"),
                        corstr      = 'exchangeable', 
                        scale.fix   = TRUE,
                        scale.value = 1)
                 (Intercept)                 genderFemale 
                   -1.398469                     0.361111 
                  age_center              I(age_center^2) 
                    0.048638                    -0.011231 
     genderFemale:age_center genderFemale:I(age_center^2) 
                   -0.014399                     0.004094 
fit_gee_1un <- gee::gee(obesity_num ~ gender*age_center + gender*I(age_center^2), 
                        id          = id, 
                        data        = data_long,
                        family      = binomial(link = "logit"),
                        corstr      = 'unstructured', 
                        scale.fix   = TRUE,
                        scale.value = 1)
                 (Intercept)                 genderFemale 
                   -1.398469                     0.361111 
                  age_center              I(age_center^2) 
                    0.048638                    -0.011231 
     genderFemale:age_center genderFemale:I(age_center^2) 
                   -0.014399                     0.004094 
texreg::knitreg(list(extract_gee_exp(fit_gee_1in), 
                     extract_gee_exp(fit_gee_1ex),
                     extract_gee_exp(fit_gee_1un)),
                custom.model.names = c("Independent",
                                       "Exchangable",
                                       "Unstructured"),
                caption = "Gee Model Parameters: With Interactions",
                single.row = TRUE,
                ci.test = 1)
Gee Model Parameters: With Interactions
  Independent Exchangable Unstructured
(Intercept) 0.25 [0.17; 0.35]* 0.24 [0.17; 0.35]* 0.25 [0.17; 0.35]*
genderFemale 1.43 [0.87; 2.35] 1.40 [0.87; 2.24] 1.39 [0.86; 2.23]
age_center 1.05 [0.94; 1.17] 1.07 [0.97; 1.17] 1.06 [0.97; 1.16]
age_center^2 0.99 [0.96; 1.01] 0.99 [0.97; 1.01] 0.99 [0.97; 1.01]
genderFemale:age_center 0.99 [0.86; 1.13] 1.02 [0.91; 1.14] 1.02 [0.91; 1.14]
genderFemale:age_center^2 1.00 [0.97; 1.04] 1.01 [0.98; 1.03] 1.01 [0.98; 1.03]
Dispersion 1.00 1.00 1.00
* Null hypothesis value outside the confidence interval.

20.6.1 Drop the interaction with gender.

fit_gee_2in <- gee::gee(obesity_num ~ gender + age_center + I(age_center^2), 
                        id          = id, 
                        data        = data_long,
                        family      = binomial(link = "logit"),
                        corstr      = 'independence', 
                        scale.fix   = TRUE,
                        scale.value = 1)
    (Intercept)    genderFemale      age_center I(age_center^2) 
      -1.416998        0.392184        0.039733       -0.008682 
fit_gee_2ex <- gee::gee(obesity_num ~ gender + age_center + I(age_center^2), 
                        id          = id, 
                        data        = data_long,
                        family      = binomial(link = "logit"),
                        corstr      = 'exchangeable', 
                        scale.fix   = TRUE,
                        scale.value = 1)
    (Intercept)    genderFemale      age_center I(age_center^2) 
      -1.416998        0.392184        0.039733       -0.008682 
fit_gee_2un <- gee::gee(obesity_num ~ gender + age_center + I(age_center^2),  
                        id          = id, 
                        data        = data_long,
                        family      = binomial(link = "logit"),
                        corstr      = 'unstructured', 
                        scale.fix   = TRUE,
                        scale.value = 1)
    (Intercept)    genderFemale      age_center I(age_center^2) 
      -1.416998        0.392184        0.039733       -0.008682 
texreg::knitreg(list(extract_gee_exp(fit_gee_2in), 
                       extract_gee_exp(fit_gee_2ex),
                       extract_gee_exp(fit_gee_2un)),
                  custom.model.names = c("Independent",
                                         "Exchangable",
                                         "Unstructured"),
                  caption = "Gee Model Parameters: Main Effects Only",
                  single.row = TRUE,
                  ci.test = 1)
Gee Model Parameters: Main Effects Only
  Independent Exchangable Unstructured
(Intercept) 0.24 [0.17; 0.34]* 0.23 [0.17; 0.33]* 0.24 [0.17; 0.33]*
genderFemale 1.48 [0.97; 2.27] 1.49 [0.97; 2.29] 1.48 [0.96; 2.27]
age_center 1.04 [0.98; 1.11] 1.07 [1.02; 1.13]* 1.07 [1.02; 1.13]*
age_center^2 0.99 [0.98; 1.01] 0.99 [0.98; 1.01] 0.99 [0.98; 1.01]
Dispersion 1.00 1.00 1.00
* Null hypothesis value outside the confidence interval.

20.6.2 Select the “final” model.

fit_geeglm_2un <- geepack::geeglm(obesity_num ~ gender + age_center + I(age_center^2),  
                                  id          = id, 
                                  data        = data_long,
                                  family      = binomial(link = "logit"),
                                  corstr      = 'unstructured')
interactions::interact_plot(model = fit_geeglm_2un,
                            pred = age_center,
                            modx = gender)

plot_pred_gee <- fit_geeglm_2un %>% 
  emmeans::emmeans(~ gender*age_center,
                   at = list(age_center = seq(from = -6, to = 6, by = .1)),
                   type = "response",
                   level = .685) %>% 
  data.frame() %>% 
  mutate(gender = fct_rev(gender)) %>% 
  mutate(age = age_center + 12) %>% 
  ggplot(aes(x        = age,
             y        = prob,
             group    = gender)) +
  geom_ribbon(aes(ymin = asymp.LCL,
                  ymax = asymp.UCL,
                  fill = gender),
              alpha = .2) +
  geom_line(aes(linetype = gender,
                color    = gender),
            size = 1.5) +
  theme_bw() +
  labs(title = "Generalized Estimating Equation: Model #2, unstructured",
       x     = "Child's Age, years",
       y     = "Predicted Proportion with Obesity",
       linetype = "Gender",
       fill = "Gender",
       color = "Gender") +
  theme(legend.position = c(0, 1),
        legend.justification = c(-0.05, 1.05),
        legend.background = element_rect(color = "black"),
        legend.key.width = unit(2, "cm")) +
  scale_x_continuous(breaks = seq(from = 6, to = 18, by = 3)) 

  
plot_pred_gee

20.7 GLMM Analysis

IT IS GENERALLY NOT RECOMMENDED THAT RANDOM-SLOPES BE ESTIMATED FOR BINOMIAL GLMMS

fit_glmm_1 <- lme4::glmer(obesity_num ~ age_center*gender + I(age_center^2)*gender + (1|id), 
                          data = data_long,
                          family      = binomial(link = "logit")) 

fit_glmm_2 <- lme4::glmer(obesity_num ~ gender + age_center + I(age_center^2) + (1|id), 
                          data = data_long,
                          family      = binomial(link = "logit")) 

Indicates smaller model is better, no interaction at higher level necessary

anova(fit_glmm_1, fit_glmm_2)
Data: data_long
Models:
fit_glmm_2: obesity_num ~ gender + age_center + I(age_center^2) + (1 | id)
fit_glmm_1: obesity_num ~ age_center * gender + I(age_center^2) * gender + (1 | id)
           npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
fit_glmm_2    5 883 908   -436      873                    
fit_glmm_1    7 885 920   -436      871  1.54  2       0.46
texreg::knitreg(list(extract_glmer_exp(fit_glmm_1), 
                     extract_glmer_exp(fit_glmm_2)),
                custom.model.names = c("Interaction",
                                       "Main Effects"),
                caption = "GLMM: Parameter EStimates",
                single.row = TRUE,
                ci.test = 1)
GLMM: Parameter EStimates
  Interaction Main Effects
(Intercept) 0.00 [0.00; 0.01]* 0.00 [0.00; 0.01]*
age_center 1.21 [0.97; 1.50] 1.25 [1.09; 1.44]*
genderFemale 1.54 [0.41; 5.74] 2.05 [0.61; 6.91]
age_center^2 0.97 [0.92; 1.02] 0.98 [0.95; 1.01]
age_center:genderFemale 1.08 [0.81; 1.43]  
genderFemale:age_center^2 1.03 [0.97; 1.10]  
* Null hypothesis value outside the confidence interval.
interactions::interact_plot(model = fit_glmm_2,
                            pred = age_center,
                            modx = gender,
                            int.width = .685,
                            interval = TRUE)

Effect(c("gender", "age_center"),fit_glmm_2) %>% 
  data.frame %>% 
  mutate(fit_exp = exp(fit))
   gender age_center       fit        se     lower    upper fit_exp
1    Male         -6 0.0002128 0.0002429 2.271e-05 0.001991   1.000
2  Female         -6 0.0004365 0.0005150 4.321e-05 0.004394   1.000
3    Male         -3 0.0006428 0.0005657 1.145e-04 0.003601   1.001
4  Female         -3 0.0013179 0.0012250 2.129e-04 0.008112   1.001
5    Male          0 0.0014512 0.0011546 3.048e-04 0.006880   1.001
6  Female          0 0.0029726 0.0025307 5.590e-04 0.015646   1.003
7    Male          3 0.0024489 0.0018764 5.445e-04 0.010941   1.002
8  Female          3 0.0050111 0.0041414 9.878e-04 0.025010   1.005
9    Male          6 0.0030909 0.0027490 5.393e-04 0.017504   1.003
10 Female          6 0.0063206 0.0059705 9.861e-04 0.039377   1.006
plot_pred_glmm <- fit_glmm_2 %>% 
  emmeans::emmeans(~ gender*age_center,
                   at = list(age_center = seq(from = -6, to = 6, by = .1)),
                   type = "response",
                   level = .685) %>% 
  data.frame() %>% 
  mutate(gender = fct_rev(gender)) %>% 
  mutate(age = age_center + 12) %>% 
  ggplot(aes(x        = age,
             y        = prob,
             group    = gender)) +
  geom_ribbon(aes(ymin = asymp.LCL,
                  ymax = asymp.UCL,
                  fill = gender),
              alpha = .2) +
  geom_line(aes(linetype = gender,
                color    = gender),
            size = 1.5) +
  theme_bw() +
  labs(title = "Generalized Linear Mixed Effects: Model #2, Random Interepts",
       x     = "Child's Age, years",
       y     = "Predicted Probability of Obesity",
       linetype = "Gender",
       fill = "Gender",
       color = "Gender") +
  theme(legend.position = c(0, 1),
        legend.justification = c(-0.05, 1.05),
        legend.background = element_rect(color = "black"),
        legend.key.width = unit(2, "cm")) +
  scale_x_continuous(breaks = seq(from = 6, to = 18, by = 3)) 

plot_pred_glmm 

20.8 Compare Methods

texreg::knitreg(list(extract_glm_exp(fit_glm_2),
                       extract_gee_exp(fit_gee_2un),
                       extract_glmer_exp(fit_glmm_2)),
                  custom.model.names = c("GLM",
                                         "GEE",
                                         "GLMM"),
                  caption = "Compare Methods: Parameter EStimates",
                  single.row = TRUE,
                  ci.test = 1)
Compare Methods: Parameter EStimates
  GLM GEE GLMM
(Intercept) 0.24 [0.19; 0.31]* 0.24 [0.17; 0.33]* 0.00 [0.00; 0.01]*
genderFemale 1.48 [1.10; 2.00]* 1.48 [0.96; 2.27] 2.05 [0.61; 6.91]
age_center 1.04 [0.99; 1.10] 1.07 [1.02; 1.13]* 1.25 [1.09; 1.44]*
age_center^2 0.99 [0.98; 1.01] 0.99 [0.98; 1.01] 0.98 [0.95; 1.01]
Dispersion   1.00  
* Null hypothesis value outside the confidence interval.
plot_pred_glm / plot_pred_gee / plot_pred_glmm

(plot_pred_glm + theme(legend.position = "none")) / 
  (plot_pred_gee + theme(legend.position = "none")) / 
  plot_pred_glmm +
  plot_annotation(tag_levels = "A") +
  plot_layout(guides = "auto")  

data_long %>% 
  dplyr::mutate(pred = predict(fit_glmm_2, re.form = ~ (1|id))) %>% 
  dplyr::mutate(odds = exp(pred)) %>% 
  dplyr::mutate(prob = odds/(1 + odds)) %>% 
  ggplot(aes(x = age_curr,
             y = prob,
             group = id)) +
  geom_line(aes(color = gender)) +
  theme_bw()