Demonstration of Generalized Linear Mixed Effect Models on microscopy data, where datapoints (cells) on a microscopy slide might not be independent.
# insert data
dat <- data.frame(experiment = rep(c("day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day2", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1", "day1"),2),
RNAi = rep(c("Control", "Target"), each = 100),
counts = c(6, 6, 4, 6, 3, 5, 10, 9, 5, 4, 8, 4, 9, 7, 7, 2, 6, 9, 4, 3, 5, 4, 4, 2, 8, 4, 5, 6, 3, 7, 7, 4, 6, 3, 3, 3, 7, 5, 5, 4, 5, 5, 5, 6, 5, 3, 3, 4, 5, 5, 2, 3, 4, 4, 4, 7, 5, 4, 6, 4, 5, 4, 4, 5, 4, 4, 5, 2, 5, 4, 5, 4, 6, 10, 5, 6, 4, 11, 3, 13, 3, 6, 3, 5, 5, 4, 4, 4, 4, 6, 8, 6, 4, 11, 5, 4, 5, 1, 2, 4, 16, 6, 9, 7, 11, 12, 6, 3, 7, 8, 8, 12, 7, 9, 5, 9, 5, 10, 7, 6, 7, 19, 3, 13, 7, 6, 13, 8, 8, 11, 7, 13, 10, 6, 4, 13, 3, 4, 2, 10, 6, 8, 6, 9, 9, 5, 4, 20, 2, 5, 7, 5, 10, 4, 7, 5, 6, 3, 4, 7, 9, 8, 16, 6, 10, 8, 5, 6, 4, 11, 6, 2, 6, 8, 11, 6, 5, 8, 8, 7, 10, 9, 7, 4, 6, 8, 2, 10, 7, 8, 3, 4, 10, 13, 6, 12, 8, 1, 5, 6))
# group for each slide
dat$slide <- factor(paste(dat$RNAi, dat$experiment, sep = "_"))
# relevel RNAi
dat$RNAi <- relevel(dat$RNAi, ref = "Control")
# check factor levels
str(dat)
'data.frame': 200 obs. of 4 variables:
$ experiment: Factor w/ 2 levels "day1","day2": 2 2 2 2 2 2 2 2 2 2 ...
$ RNAi : Factor w/ 2 levels "Control","Target": 1 1 1 1 1 1 1 1 1 1 ...
$ counts : num 6 6 4 6 3 5 10 9 5 4 ...
$ slide : Factor w/ 4 levels "Control_day1",..: 2 2 2 2 2 2 2 2 2 2 ...
# simple summary
summary(dat)
experiment RNAi counts slide
day1:100 Control:100 Min. : 1.000 Control_day1:50
day2:100 Target :100 1st Qu.: 4.000 Control_day2:50
Median : 6.000 Target_day1 :50
Mean : 6.275 Target_day2 :50
3rd Qu.: 8.000
Max. :20.000
library(beeswarm)
boxplot(counts ~ slide, data = dat, ylim = c(0,20))
beeswarm(counts ~ slide, data = dat, pch=19, col="#00000088", add = TRUE)
library(car)
library(MASS)
par(mfrow = c(1,4), cex=1.1)
attach(dat)
# normal distribution (it does not look "normal")
qqp(counts, "norm", pch = 20)
# lognormal distribution
qqp(counts, "lnorm", pch = 20)
# Poisson distribution
poisson <- fitdistr(counts, "Poisson")
qqp(counts, "pois", lambda = poisson$estimate, pch = 20)
# negative binomial distribution (Poisson + overdispersion)
negbinom <- fitdistr(counts, "negative binomial")
qqp(counts, "nbinom", size = negbinom$estimate[1], mu = negbinom$estimate[2], pch = 20)
Wikipedia Links:
https://en.wikipedia.org/wiki/Multilevel_model
https://en.wikipedia.org/wiki/Mixed_model
# fit a fixed effect glm for estimating theta (dispersion) parameter
glm_fixed <- glm.nb(counts ~ RNAi, data = dat)
theta <- glm_fixed$theta
# use theta in glmm
glmm <- glmmPQL(counts ~ RNAi, random = ~1|slide, data = dat,
family = negative.binomial(link = "log", theta = theta))
summary(glmm)
Linear mixed-effects model fit by maximum likelihood
Data: dat
AIC BIC logLik
NA NA NA
Random effects:
Formula: ~1 | slide
(Intercept) Residual
StdDev: 0.009718551 0.9935715
Variance function:
Structure: fixed weights
Formula: ~invwt
Fixed effects: counts ~ RNAi
Value Std.Error DF t-value p-value
(Intercept) 1.6174061 0.04919156 196 32.87975 0.0000
RNAiTarget 0.3988294 0.06472175 2 6.16222 0.0253
Correlation:
(Intr)
RNAiTarget -0.76
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-2.0948686 -0.4920140 -0.1595450 0.3940259 4.0237412
Number of Observations: 200
Number of Groups: 4
library(nlme)
as.numeric(c(exp(fixed.effects(glmm))[1],
exp(fixed.effects(glmm))[1] * exp(fixed.effects(glmm))[2]))
[1] 5.04 7.51
pvals <- replicate(n = 100, expr = {
dat_sub <- dat[sample(1:200, size = 100),]
glmm_sub <- glmmPQL(counts ~ RNAi, random = ~1|slide, data = dat_sub,
family = negative.binomial(link = "log", theta = theta),
verbose = F)
coef(summary(glmm_sub))[2,5]})
summary(pvals)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.02691 0.04282 0.05253 0.05698 0.06512 0.13585
Many thanks to these pages:
http://www.flutterbys.com.au/stats/tut/tut9.1.html
http://www.flutterbys.com.au/stats/tut/tut11.2a.html
The dataset was provided by Natalia Kochanova (LMU, BMC, Molecular Biology, Imhof group)
If you see mistakes or want to suggest changes, please create an issue on the source repository.
For attribution, please cite this work as
Schauer (2019, Nov. 29). CompBioMethods: Generalized Linear Mixed Effect Models. Retrieved from https://tschauer.github.io/blog/posts/2020-02-12-generalized-linear-mixed-effect-models/
BibTeX citation
@misc{schauer2019generalized, author = {Schauer, Tamas}, title = {CompBioMethods: Generalized Linear Mixed Effect Models}, url = {https://tschauer.github.io/blog/posts/2020-02-12-generalized-linear-mixed-effect-models/}, year = {2019} }