20 GLMM, Binary Outcome: Muscatine Obesity
20.1 Packages
20.1.1 CRAN
library(tidyverse) # all things tidy
library(pander) # nice looking genderal tabulations
library(furniture) # nice table1() descriptives
library(texreg) # Convert Regression Output to LaTeX or HTML Tables
library(psych) # contains some useful functions, like headTail
library(lme4) # Linear, generalized linear, & nonlinear mixed models
library(gee) # Generalized Estimating Equations
library(effects) # Plotting estimated marginal means
library(performance)
library(interactions)
library(patchwork) # combining graphics
20.2 Data Prep
Data on Obesity from the Muscatine Coronary Risk Factor Study.
Source:
Table 10 (page 96) in Woolson and Clarke (1984). With permission of Blackwell Publishing.
Reference:
Woolson, R.F. and Clarke, W.R. (1984). Analysis of categorical incompletel longitudinal data. Journal of the Royal Statistical Society, Series A, 147, 87-99.
Description:
The Muscatine Coronary Risk Factor Study (MCRFS) was a longitudinal study of coronary risk factors in school children in Muscatine, Iowa (Woolson and Clarke 1984; Ekholm and Skinner 1998). Five cohorts of children were measured for height
and weight
in 1977, 1979, and 1981. Relative weight
was calculated as the ratio of a child’s observed weight to the median weight for their age-sex-height group. Children with a relative weight greater than 110% of the median weight for their respective stratum were classified as obese
. The analysis of this study involves binary data (1 = obese, 0 = not obese) collected at successive time points.
This data was also using in an article title “Missing data methods in longitudinal studies: a review” (https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3016756/).
Variable List:
Indicators
id
Child’s unique identification numberoccas
Occasion number: 1, 2, 3
Outcome or dependent variable
obesity
Obesity Status, 0 = no, 1 = yes
Main predictor or independent variable of interest
gender
0 = Male, 1 = Femalebaseage
Baseline Age, mid-point of age-cohortcurrage
Current Age, mid-point of age-cohort
20.2.1 Import
<- read.table("https://raw.githubusercontent.com/CEHS-research/data/master/MLM/Muscatine.txt", header=TRUE) data_raw
str(data_raw)
'data.frame': 14568 obs. of 6 variables:
$ id : int 1 1 1 2 2 2 3 3 3 4 ...
$ gender : int 0 0 0 0 0 0 0 0 0 0 ...
$ baseage: int 6 6 6 6 6 6 6 6 6 6 ...
$ currage: int 6 8 10 6 8 10 6 8 10 6 ...
$ occas : int 1 2 3 1 2 3 1 2 3 1 ...
$ obesity: chr "1" "1" "1" "1" ...
::headTail(data_raw, top = 10) psych
id gender baseage currage occas obesity
1 1 0 6 6 1 1
2 1 0 6 8 2 1
3 1 0 6 10 3 1
4 2 0 6 6 1 1
5 2 0 6 8 2 1
6 2 0 6 10 3 1
7 3 0 6 6 1 1
8 3 0 6 8 2 1
9 3 0 6 10 3 1
10 4 0 6 6 1 1
... ... ... ... ... ... <NA>
14565 4855 1 14 18 3 0
14566 4856 1 14 14 1 .
14567 4856 1 14 16 2 .
14568 4856 1 14 18 3 0
20.2.2 Restrict to 350ID’s of children with complete data for Class Demonstration
Dealing with missing-ness and its implications are beyond the score of this class. Instead we are going to restrict our class analysis to a subset of 350 children who have complete data
I am using the
set.seed()
function so that I can replicate the restults later.
<- data_raw %>%
complete_ids ::filter(obesity %in% c("0", "1")) %>%
dplyr::group_by(id) %>%
dplyr::summarise(n = n()) %>%
dplyr::filter(n == 3) %>%
dplyr::pull(id)
dplyr
set.seed(8892) # needed?
<- complete_ids %>% sample(350)
use_ids
head(use_ids)
[1] 3574 805 3458 3537 679 655
20.2.3 Long Format
<- data_raw %>%
data_long ::filter(id %in% use_ids) %>%
dplyrmutate(id = id %>% factor) %>%
mutate(gender = gender %>% factor(levels = 0:1,
labels = c("Male", "Female"))) %>%
mutate(age_base = baseage %>% factor) %>%
mutate(age_curr = currage %>% factor) %>%
mutate(occation = occas %>% factor) %>%
mutate(obesity = obesity %>% factor(levels = 0:1,
labels = c("No", "Yes"))) %>%
select(id, gender, age_base, age_curr, occation, obesity)
str(data_long)
'data.frame': 1050 obs. of 6 variables:
$ id : Factor w/ 350 levels "1","5","10","16",..: 1 1 1 2 2 2 3 3 3 4 ...
$ gender : Factor w/ 2 levels "Male","Female": 1 1 1 1 1 1 1 1 1 1 ...
$ age_base: Factor w/ 5 levels "6","8","10","12",..: 1 1 1 1 1 1 2 2 2 2 ...
$ age_curr: Factor w/ 7 levels "6","8","10","12",..: 1 2 3 1 2 3 2 3 4 2 ...
$ occation: Factor w/ 3 levels "1","2","3": 1 2 3 1 2 3 1 2 3 1 ...
$ obesity : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
::headTail(data_long, top = 10) psych
id gender age_base age_curr occation obesity
1 1 Male 6 6 1 Yes
2 1 Male 6 8 2 Yes
3 1 Male 6 10 3 Yes
4 5 Male 6 6 1 Yes
5 5 Male 6 8 2 Yes
6 5 Male 6 10 3 Yes
7 10 Male 8 8 1 Yes
8 10 Male 8 10 2 Yes
9 10 Male 8 12 3 Yes
10 16 Male 8 8 1 Yes
... <NA> <NA> <NA> <NA> <NA> <NA>
1047 3582 Female 14 18 3 No
1048 3584 Female 14 14 1 No
1049 3584 Female 14 16 2 No
1050 3584 Female 14 18 3 No
20.2.4 Wide Format
<- data_long %>%
data_wide ::pivot_wider(names_from = occation,
tidyrnames_sep = "_",
values_from = c(obesity, age_curr)) %>%
mutate_if(is.character, factor)%>%
group_by(id) %>%
mutate(num_miss = sum(is.na(c(obesity_1, obesity_2, obesity_3)))) %>%
ungroup() %>%
mutate(num_miss = as.factor(num_miss))
str(data_wide)
tibble [350 × 10] (S3: tbl_df/tbl/data.frame)
$ id : Factor w/ 350 levels "1","5","10","16",..: 1 2 3 4 5 6 7 8 9 10 ...
$ gender : Factor w/ 2 levels "Male","Female": 1 1 1 1 1 1 1 1 1 1 ...
$ age_base : Factor w/ 5 levels "6","8","10","12",..: 1 1 2 2 2 3 3 3 4 4 ...
$ obesity_1 : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
$ obesity_2 : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
$ obesity_3 : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
$ age_curr_1: Factor w/ 7 levels "6","8","10","12",..: 1 1 2 2 2 3 3 3 4 4 ...
$ age_curr_2: Factor w/ 7 levels "6","8","10","12",..: 2 2 3 3 3 4 4 4 5 5 ...
$ age_curr_3: Factor w/ 7 levels "6","8","10","12",..: 3 3 4 4 4 5 5 5 6 6 ...
$ num_miss : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
::headTail(data_wide, top = 10) psych
id gender age_base obesity_1 obesity_2 obesity_3 age_curr_1 age_curr_2
1 1 Male 6 Yes Yes Yes 6 8
2 5 Male 6 Yes Yes Yes 6 8
3 10 Male 8 Yes Yes Yes 8 10
4 16 Male 8 Yes Yes Yes 8 10
5 21 Male 8 Yes Yes Yes 8 10
6 30 Male 10 Yes Yes Yes 10 12
7 44 Male 10 Yes Yes Yes 10 12
8 50 Male 10 Yes Yes Yes 10 12
9 60 Male 12 Yes Yes Yes 12 14
10 61 Male 12 Yes Yes Yes 12 14
11 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
12 3580 Female 14 No No No 14 16
13 3581 Female 14 No No No 14 16
14 3582 Female 14 No No No 14 16
15 3584 Female 14 No No No 14 16
age_curr_3 num_miss
1 10 0
2 10 0
3 12 0
4 12 0
5 12 0
6 14 0
7 14 0
8 14 0
9 16 0
10 16 0
11 <NA> <NA>
12 18 0
13 18 0
14 18 0
15 18 0
20.3 Exploratory Data Analysis
20.3.1 Summary Statistics
20.3.1.1 Demographics and Baseline
%>%
data_wide ::group_by(gender) %>%
dplyr::table1("Baseline Age" = age_base,
furniture"Baseline Obesity" = obesity_1,
total = TRUE,
test = TRUE,
na.rm = FALSE,
output = "markdown")
Total | Male | Female | P-Value | |
---|---|---|---|---|
n = 350 | n = 166 | n = 184 | ||
Baseline Age | 0.759 | |||
6 | 42 (12%) | 17 (10.2%) | 25 (13.6%) | |
8 | 91 (26%) | 47 (28.3%) | 44 (23.9%) | |
10 | 92 (26.3%) | 41 (24.7%) | 51 (27.7%) | |
12 | 67 (19.1%) | 33 (19.9%) | 34 (18.5%) | |
14 | 58 (16.6%) | 28 (16.9%) | 30 (16.3%) | |
NA | 0 (0%) | 0 (0%) | 0 (0%) | |
Baseline Obesity | 0.608 | |||
No | 286 (81.7%) | 138 (83.1%) | 148 (80.4%) | |
Yes | 64 (18.3%) | 28 (16.9%) | 36 (19.6%) | |
NA | 0 (0%) | 0 (0%) | 0 (0%) |
20.3.1.2 Status over Time
<- data_long %>%
data_summary ::group_by(gender, age_curr) %>%
dplyr::mutate(obesityN = case_when(obesity == "Yes" ~ 1,
dplyr== "No" ~ 0)) %>%
obesity ::filter(complete.cases(gender, age_curr, obesityN)) %>%
dplyr::summarise(n = n(),
dplyrprob_est = mean(obesityN),
prob_SD = sd(obesityN),
prob_SE = prob_SD/sqrt(n))
data_summary
# A tibble: 14 × 6
# Groups: gender [2]
gender age_curr n prob_est prob_SD prob_SE
<fct> <fct> <int> <dbl> <dbl> <dbl>
1 Male 6 17 0.118 0.332 0.0805
2 Male 8 64 0.172 0.380 0.0475
3 Male 10 105 0.143 0.352 0.0343
4 Male 12 121 0.198 0.400 0.0364
5 Male 14 102 0.225 0.420 0.0416
6 Male 16 61 0.213 0.413 0.0529
7 Male 18 28 0.143 0.356 0.0673
8 Female 6 25 0.16 0.374 0.0748
9 Female 8 69 0.203 0.405 0.0488
10 Female 10 120 0.275 0.448 0.0409
11 Female 12 129 0.256 0.438 0.0386
12 Female 14 115 0.243 0.431 0.0402
13 Female 16 64 0.281 0.453 0.0566
14 Female 18 30 0.267 0.450 0.0821
20.3.2 Visualize
20.3.2.1 By cohort and gender
%>%
data_long ::group_by(gender, age_base, age_curr) %>%
dplyr::mutate(obesityN = case_when(obesity == "Yes" ~ 1,
dplyr== "No" ~ 0)) %>%
obesity ::filter(complete.cases(gender, age_curr, obesityN)) %>%
dplyr::summarise(n = n(),
dplyrprob_est = mean(obesityN),
prob_SD = sd(obesityN),
prob_SE = prob_SD/sqrt(n)) %>%
ggplot(aes(x = age_curr,
y = prob_est,
group = age_base,
color = age_base)) +
geom_point() +
geom_line() +
theme_bw() +
labs(x = "Child's Age, years",
y = "Proportion Obese") +
facet_grid(. ~ gender)
20.3.2.2 BY only gender
%>%
data_summary ggplot(aes(x = age_curr,
y = prob_est,
group = gender)) +
geom_ribbon(aes(ymin = prob_est - prob_SE,
ymax = prob_est + prob_SE,
fill = gender),
alpha = .3) +
geom_point(aes(color = gender,
shape = gender)) +
geom_line(aes(linetype = gender,
color = gender)) +
theme_bw() +
scale_color_manual(values = c("dodger blue", "hot pink")) +
scale_fill_manual(values = c("dodger blue", "hot pink")) +
labs(x = "Child's Age, years",
y = "Proportion Obese")
Smooth out the trends
%>%
data_summary ggplot(aes(x = age_curr,
y = prob_est,
group = gender,
color = gender)) +
geom_smooth(method = "lm",
formula = y ~ poly(x, 2),
se = FALSE) +
theme_bw() +
scale_color_manual(values = c("dodger blue", "hot pink")) +
scale_fill_manual(values = c("dodger blue", "hot pink")) +
labs(x = "Child's Age, years",
y = "Proportion Obese")
20.4 Analysis Goal
Does risk of obesity increase with age and are patterns of change similar for both sexes?
There are 5 age cohorts that were measured each for 3 years, baseage and currage are age midpoints of those cohort groups. Which to include, current age or occasion? Assume no cohort effects. If you do think this is an issue, include baseline age (age_base
) and current age minus baseline age (time
) in model.
%>%
data_long group_by(gender, age_base, occation) %>%
summarise(n = n(),
count = sum(obesity == "Yes"),
prop = mean(obesity == "Yes"),
se = sd(obesity == "Yes")/sqrt(n)) %>%
mutate(time = (occation %>% as.numeric) * 2 - 2) %>%
ggplot(aes(x = time,
y = prop,
fill = gender)) +
geom_ribbon(aes(ymin = prop - se,
ymax = prop + se),
alpha = 0.2) +
geom_point(aes(color = gender)) +
geom_line(aes(color = gender)) +
theme_bw() +
facet_wrap(~ age_base, labeller = label_both) +
labs(title = "Observed Obesity Rates, by Gender within Cohort",
subtitle = "Subset of 350 children with complete data",
x = "Time, years from 1977",
y = "Proportion of Children Characterized as Obese") +
scale_fill_manual(values = c("dodgerblue3", "red")) +
scale_color_manual(values = c("dodgerblue3", "red")) +
scale_x_continuous(breaks = seq(from = 0, to = 4, by = 2)) +
theme(legend.position = c(1, 0),
legend.justification = c(1, 0),
legend.background = element_rect(color = "black"))
%>%
data_long group_by(gender, age_curr) %>%
summarise(n = n(),
count = sum(obesity == "Yes"),
prop = mean(obesity == "Yes"),
se = sd(obesity == "Yes")/sqrt(n)) %>%
ggplot(aes(x = age_curr %>% as.character %>% as.numeric,
y = prop,
group = gender,
fill = gender)) +
geom_ribbon(aes(ymin = prop - se,
ymax = prop + se),
alpha = 0.2) +
geom_point(aes(color = gender)) +
geom_line(aes(color = gender)) +
theme_bw() +
geom_vline(xintercept = 12,
linetype = "dashed",
size = 1,
color = "navyblue") +
labs(title = "Observed Obesity Rates, by Gender (collapsing cohorts)",
subtitle = "Subset of 350 children with complete data",
x = "Age of Child, years",
y = "Proportion of Children Characterized as Obese") +
scale_fill_manual(values = c("dodgerblue3", "red")) +
scale_color_manual(values = c("dodgerblue3", "red")) +
scale_x_continuous(breaks = seq(from = 6, to = 18, by = 2)) +
theme(legend.position = c(0, 1),
legend.justification = c(-0.05, 1.05),
legend.background = element_rect(color = "black"))
20.4.1 Center time at twelve years old
<- data_long %>%
data_long ::mutate(age_center = age_curr %>% as.character %>% as.numeric -12) %>%
dplyr::mutate(obesity_num = obesity %>% as.numeric - 1)
dplyr
::headTail(data_long) psych
id gender age_base age_curr occation obesity age_center obesity_num
1 1 Male 6 6 1 Yes -6 1
2 1 Male 6 8 2 Yes -4 1
3 1 Male 6 10 3 Yes -2 1
4 5 Male 6 6 1 Yes -6 1
... <NA> <NA> <NA> <NA> <NA> <NA> ... ...
1047 3582 Female 14 18 3 No 6 0
1048 3584 Female 14 14 1 No 2 0
1049 3584 Female 14 16 2 No 4 0
1050 3584 Female 14 18 3 No 6 0
20.5 GLM Analysis
20.5.1 Standard logistic regression
<- glm(obesity_num ~ gender*age_center + gender*I(age_center^2),
fit_glm_1 data = data_long,
family = binomial(link = "logit"))
<- glm(obesity_num ~ gender + age_center + I(age_center^2),
fit_glm_2 data = data_long,
family = binomial(link = "logit"))
::knitreg(list(extract_glm_exp(fit_glm_1),
texregextract_glm_exp(fit_glm_2)),
custom.model.names = c("Interaction",
"Main Effects"),
caption = "GLM: Parameter EStimates",
single.row = TRUE,
ci.test = 1)
Interaction | Main Effects | |
---|---|---|
(Intercept) | 0.25 [0.18; 0.33]* | 0.24 [0.19; 0.31]* |
genderFemale | 1.43 [0.97; 2.12] | 1.48 [1.10; 2.00]* |
age_center | 1.05 [0.97; 1.14] | 1.04 [0.99; 1.10] |
age_center^2 | 0.99 [0.96; 1.01] | 0.99 [0.98; 1.01] |
genderFemale:age_center | 0.99 [0.89; 1.09] | |
genderFemale:age_center^2 | 1.00 [0.97; 1.04] | |
* Null hypothesis value outside the confidence interval. |
<- Effect(c("gender", "age_center"),
plot_pred_glm
fit_glm_2,xlevels = list(age_center = seq(from = -6, to = 6, by = 0.25))) %>%
%>%
data.frame mutate(age = age_center + 12) %>%
::mutate(gender = forcats::fct_rev(gender)) %>%
dplyrggplot(aes(x = age,
y = fit,
group = gender,
linetype = gender,
fill = gender)) +
geom_ribbon(aes(ymin = fit - se,
ymax = fit + se),
alpha = .3) +
geom_line(aes(color = gender),
size = 1.5) +
theme_bw() +
labs(title = "Generalized Linear Model: Model #2",
x = "Child's Age, years",
y = "Predicted Probability of Obesity",
linetype = "Gender",
fill = "Gender",
color = "Gender") +
theme(legend.position = c(0, 1),
legend.justification = c(-0.05, 1.05),
legend.background = element_rect(color = "black"),
legend.key.width = unit(2, "cm")) +
scale_x_continuous(breaks = seq(from = 6, to = 18, by = 3))
plot_pred_glm
20.6 GEE Analysis
ALWAYS: fix the scale parameter to 1 with binomial data!!!
<- gee::gee(obesity_num ~ gender*age_center + gender*I(age_center^2),
fit_gee_1in id = id,
data = data_long,
family = binomial(link = "logit"),
corstr = 'independence',
scale.fix = TRUE,
scale.value = 1)
(Intercept) genderFemale
-1.398469 0.361111
age_center I(age_center^2)
0.048638 -0.011231
genderFemale:age_center genderFemale:I(age_center^2)
-0.014399 0.004094
<- gee::gee(obesity_num ~ gender*age_center + gender*I(age_center^2),
fit_gee_1ex id = id,
data = data_long,
family = binomial(link = "logit"),
corstr = 'exchangeable',
scale.fix = TRUE,
scale.value = 1)
(Intercept) genderFemale
-1.398469 0.361111
age_center I(age_center^2)
0.048638 -0.011231
genderFemale:age_center genderFemale:I(age_center^2)
-0.014399 0.004094
<- gee::gee(obesity_num ~ gender*age_center + gender*I(age_center^2),
fit_gee_1un id = id,
data = data_long,
family = binomial(link = "logit"),
corstr = 'unstructured',
scale.fix = TRUE,
scale.value = 1)
(Intercept) genderFemale
-1.398469 0.361111
age_center I(age_center^2)
0.048638 -0.011231
genderFemale:age_center genderFemale:I(age_center^2)
-0.014399 0.004094
::knitreg(list(extract_gee_exp(fit_gee_1in),
texregextract_gee_exp(fit_gee_1ex),
extract_gee_exp(fit_gee_1un)),
custom.model.names = c("Independent",
"Exchangable",
"Unstructured"),
caption = "Gee Model Parameters: With Interactions",
single.row = TRUE,
ci.test = 1)
Independent | Exchangable | Unstructured | |
---|---|---|---|
(Intercept) | 0.25 [0.17; 0.35]* | 0.24 [0.17; 0.35]* | 0.25 [0.17; 0.35]* |
genderFemale | 1.43 [0.87; 2.35] | 1.40 [0.87; 2.24] | 1.39 [0.86; 2.23] |
age_center | 1.05 [0.94; 1.17] | 1.07 [0.97; 1.17] | 1.06 [0.97; 1.16] |
age_center^2 | 0.99 [0.96; 1.01] | 0.99 [0.97; 1.01] | 0.99 [0.97; 1.01] |
genderFemale:age_center | 0.99 [0.86; 1.13] | 1.02 [0.91; 1.14] | 1.02 [0.91; 1.14] |
genderFemale:age_center^2 | 1.00 [0.97; 1.04] | 1.01 [0.98; 1.03] | 1.01 [0.98; 1.03] |
Dispersion | 1.00 | 1.00 | 1.00 |
* Null hypothesis value outside the confidence interval. |
20.6.1 Drop the interaction with gender
.
<- gee::gee(obesity_num ~ gender + age_center + I(age_center^2),
fit_gee_2in id = id,
data = data_long,
family = binomial(link = "logit"),
corstr = 'independence',
scale.fix = TRUE,
scale.value = 1)
(Intercept) genderFemale age_center I(age_center^2)
-1.416998 0.392184 0.039733 -0.008682
<- gee::gee(obesity_num ~ gender + age_center + I(age_center^2),
fit_gee_2ex id = id,
data = data_long,
family = binomial(link = "logit"),
corstr = 'exchangeable',
scale.fix = TRUE,
scale.value = 1)
(Intercept) genderFemale age_center I(age_center^2)
-1.416998 0.392184 0.039733 -0.008682
<- gee::gee(obesity_num ~ gender + age_center + I(age_center^2),
fit_gee_2un id = id,
data = data_long,
family = binomial(link = "logit"),
corstr = 'unstructured',
scale.fix = TRUE,
scale.value = 1)
(Intercept) genderFemale age_center I(age_center^2)
-1.416998 0.392184 0.039733 -0.008682
::knitreg(list(extract_gee_exp(fit_gee_2in),
texregextract_gee_exp(fit_gee_2ex),
extract_gee_exp(fit_gee_2un)),
custom.model.names = c("Independent",
"Exchangable",
"Unstructured"),
caption = "Gee Model Parameters: Main Effects Only",
single.row = TRUE,
ci.test = 1)
Independent | Exchangable | Unstructured | |
---|---|---|---|
(Intercept) | 0.24 [0.17; 0.34]* | 0.23 [0.17; 0.33]* | 0.24 [0.17; 0.33]* |
genderFemale | 1.48 [0.97; 2.27] | 1.49 [0.97; 2.29] | 1.48 [0.96; 2.27] |
age_center | 1.04 [0.98; 1.11] | 1.07 [1.02; 1.13]* | 1.07 [1.02; 1.13]* |
age_center^2 | 0.99 [0.98; 1.01] | 0.99 [0.98; 1.01] | 0.99 [0.98; 1.01] |
Dispersion | 1.00 | 1.00 | 1.00 |
* Null hypothesis value outside the confidence interval. |
20.6.2 Select the “final” model.
<- geepack::geeglm(obesity_num ~ gender + age_center + I(age_center^2),
fit_geeglm_2un id = id,
data = data_long,
family = binomial(link = "logit"),
corstr = 'unstructured')
::interact_plot(model = fit_geeglm_2un,
interactionspred = age_center,
modx = gender)
<- fit_geeglm_2un %>%
plot_pred_gee ::emmeans(~ gender*age_center,
emmeansat = list(age_center = seq(from = -6, to = 6, by = .1)),
type = "response",
level = .685) %>%
data.frame() %>%
mutate(gender = fct_rev(gender)) %>%
mutate(age = age_center + 12) %>%
ggplot(aes(x = age,
y = prob,
group = gender)) +
geom_ribbon(aes(ymin = asymp.LCL,
ymax = asymp.UCL,
fill = gender),
alpha = .2) +
geom_line(aes(linetype = gender,
color = gender),
size = 1.5) +
theme_bw() +
labs(title = "Generalized Estimating Equation: Model #2, unstructured",
x = "Child's Age, years",
y = "Predicted Proportion with Obesity",
linetype = "Gender",
fill = "Gender",
color = "Gender") +
theme(legend.position = c(0, 1),
legend.justification = c(-0.05, 1.05),
legend.background = element_rect(color = "black"),
legend.key.width = unit(2, "cm")) +
scale_x_continuous(breaks = seq(from = 6, to = 18, by = 3))
plot_pred_gee
20.7 GLMM Analysis
IT IS GENERALLY NOT RECOMMENDED THAT RANDOM-SLOPES BE ESTIMATED FOR BINOMIAL GLMMS
<- lme4::glmer(obesity_num ~ age_center*gender + I(age_center^2)*gender + (1|id),
fit_glmm_1 data = data_long,
family = binomial(link = "logit"))
<- lme4::glmer(obesity_num ~ gender + age_center + I(age_center^2) + (1|id),
fit_glmm_2 data = data_long,
family = binomial(link = "logit"))
Indicates smaller model is better, no interaction at higher level necessary
anova(fit_glmm_1, fit_glmm_2)
Data: data_long
Models:
fit_glmm_2: obesity_num ~ gender + age_center + I(age_center^2) + (1 | id)
fit_glmm_1: obesity_num ~ age_center * gender + I(age_center^2) * gender + (1 | id)
npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
fit_glmm_2 5 883 908 -436 873
fit_glmm_1 7 885 920 -436 871 1.54 2 0.46
::knitreg(list(extract_glmer_exp(fit_glmm_1),
texregextract_glmer_exp(fit_glmm_2)),
custom.model.names = c("Interaction",
"Main Effects"),
caption = "GLMM: Parameter EStimates",
single.row = TRUE,
ci.test = 1)
Interaction | Main Effects | |
---|---|---|
(Intercept) | 0.00 [0.00; 0.01]* | 0.00 [0.00; 0.01]* |
age_center | 1.21 [0.97; 1.50] | 1.25 [1.09; 1.44]* |
genderFemale | 1.54 [0.41; 5.74] | 2.05 [0.61; 6.91] |
age_center^2 | 0.97 [0.92; 1.02] | 0.98 [0.95; 1.01] |
age_center:genderFemale | 1.08 [0.81; 1.43] | |
genderFemale:age_center^2 | 1.03 [0.97; 1.10] | |
* Null hypothesis value outside the confidence interval. |
::interact_plot(model = fit_glmm_2,
interactionspred = age_center,
modx = gender,
int.width = .685,
interval = TRUE)
Effect(c("gender", "age_center"),fit_glmm_2) %>%
%>%
data.frame mutate(fit_exp = exp(fit))
gender age_center fit se lower upper fit_exp
1 Male -6 0.0002128 0.0002429 2.271e-05 0.001991 1.000
2 Female -6 0.0004365 0.0005150 4.321e-05 0.004394 1.000
3 Male -3 0.0006428 0.0005657 1.145e-04 0.003601 1.001
4 Female -3 0.0013179 0.0012250 2.129e-04 0.008112 1.001
5 Male 0 0.0014512 0.0011546 3.048e-04 0.006880 1.001
6 Female 0 0.0029726 0.0025307 5.590e-04 0.015646 1.003
7 Male 3 0.0024489 0.0018764 5.445e-04 0.010941 1.002
8 Female 3 0.0050111 0.0041414 9.878e-04 0.025010 1.005
9 Male 6 0.0030909 0.0027490 5.393e-04 0.017504 1.003
10 Female 6 0.0063206 0.0059705 9.861e-04 0.039377 1.006
<- fit_glmm_2 %>%
plot_pred_glmm ::emmeans(~ gender*age_center,
emmeansat = list(age_center = seq(from = -6, to = 6, by = .1)),
type = "response",
level = .685) %>%
data.frame() %>%
mutate(gender = fct_rev(gender)) %>%
mutate(age = age_center + 12) %>%
ggplot(aes(x = age,
y = prob,
group = gender)) +
geom_ribbon(aes(ymin = asymp.LCL,
ymax = asymp.UCL,
fill = gender),
alpha = .2) +
geom_line(aes(linetype = gender,
color = gender),
size = 1.5) +
theme_bw() +
labs(title = "Generalized Linear Mixed Effects: Model #2, Random Interepts",
x = "Child's Age, years",
y = "Predicted Probability of Obesity",
linetype = "Gender",
fill = "Gender",
color = "Gender") +
theme(legend.position = c(0, 1),
legend.justification = c(-0.05, 1.05),
legend.background = element_rect(color = "black"),
legend.key.width = unit(2, "cm")) +
scale_x_continuous(breaks = seq(from = 6, to = 18, by = 3))
plot_pred_glmm
20.8 Compare Methods
::knitreg(list(extract_glm_exp(fit_glm_2),
texregextract_gee_exp(fit_gee_2un),
extract_glmer_exp(fit_glmm_2)),
custom.model.names = c("GLM",
"GEE",
"GLMM"),
caption = "Compare Methods: Parameter EStimates",
single.row = TRUE,
ci.test = 1)
GLM | GEE | GLMM | |
---|---|---|---|
(Intercept) | 0.24 [0.19; 0.31]* | 0.24 [0.17; 0.33]* | 0.00 [0.00; 0.01]* |
genderFemale | 1.48 [1.10; 2.00]* | 1.48 [0.96; 2.27] | 2.05 [0.61; 6.91] |
age_center | 1.04 [0.99; 1.10] | 1.07 [1.02; 1.13]* | 1.25 [1.09; 1.44]* |
age_center^2 | 0.99 [0.98; 1.01] | 0.99 [0.98; 1.01] | 0.98 [0.95; 1.01] |
Dispersion | 1.00 | ||
* Null hypothesis value outside the confidence interval. |
The goal of patchwork is to make it ridiculously simple to combine
separate ggplots
into the same graphic. As such it tries to
solve the same problem as gridExtra::grid.arrange()
and
cowplot::plot_grid()
but using an API that incites
exploration and iteration, and scales to arbitrarily complex
layouts.
/ plot_pred_gee / plot_pred_glmm plot_pred_glm
+ theme(legend.position = "none")) /
(plot_pred_glm + theme(legend.position = "none")) /
(plot_pred_gee +
plot_pred_glmm plot_annotation(tag_levels = "A") +
plot_layout(guides = "auto")
%>%
data_long ::mutate(pred = predict(fit_glmm_2, re.form = ~ (1|id))) %>%
dplyr::mutate(odds = exp(pred)) %>%
dplyr::mutate(prob = odds/(1 + odds)) %>%
dplyrggplot(aes(x = age_curr,
y = prob,
group = id)) +
geom_line(aes(color = gender)) +
theme_bw()