import pymc as pm
import numpy as np
import arviz as az
from itertools import combinations
%load_ext lab_black
7. Coagulation*#
An example of Bayesian ANOVA.
Adapted from Unit 7: anovacoagulation.odc.
Here 24 animals are randomly allocated to 4 different diets, but the numbers allocated to different diets are not the same. The coagulation time for blood is measured for each animal. Are the diet-based differences significant?
Box, Hunter, Hunter; Statistics for Experimenters, p. 166
# cut and pasted data from .odc file
# fmt: off
times = (62, 60, 63, 59, 63, 67, 71, 64, 65, 66, 68, 66, 71, 67, 68, 68, 56, 62,
60, 61, 63, 64, 63, 59)
diets = (1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4)
# fmt: on
# create dictionary where each key is a diet and values are lists of times
data = {}
for key, val in zip(diets, times):
data.setdefault(key, []).append(val)
data
{1: [62, 60, 63, 59],
2: [63, 67, 71, 64, 65, 66],
3: [68, 66, 71, 67, 68, 68],
4: [56, 62, 60, 61, 63, 64, 63, 59]}
Simple method#
No loops! If you’re using this style, 4 treatments is probably the max before it starts to get too annoying to type out.
with pm.Model() as m:
mu0 = pm.Normal("μ0", mu=0, tau=0.0001)
tau = pm.Gamma("τ", 0.001, 0.001)
alpha4 = pm.Normal("α4", mu=0, tau=0.0001)
alpha3 = pm.Normal("α3", mu=0, tau=0.0001)
alpha2 = pm.Normal("α2", mu=0, tau=0.0001)
# sum-to-zero constraint
alpha1 = pm.Deterministic("α1", -(alpha2 + alpha3 + alpha4))
mu_1 = mu0 + alpha1
mu_2 = mu0 + alpha2
mu_3 = mu0 + alpha3
mu_4 = mu0 + alpha4
pm.Normal("lik1", mu=mu_1, tau=tau, observed=data[1])
pm.Normal("lik2", mu=mu_2, tau=tau, observed=data[2])
pm.Normal("lik3", mu=mu_3, tau=tau, observed=data[3])
pm.Normal("lik4", mu=mu_4, tau=tau, observed=data[4])
onetwo = pm.Deterministic("α1-α2", alpha1 - alpha2)
onethree = pm.Deterministic("α1-α3", alpha1 - alpha3)
onefour = pm.Deterministic("α1-α4", alpha1 - alpha4)
twothree = pm.Deterministic("α2-α3", alpha2 - alpha3)
twofour = pm.Deterministic("α2-α4", alpha2 - alpha4)
threefour = pm.Deterministic("α3-α4", alpha3 - alpha4)
trace = pm.sample(5000)
Show code cell output
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [μ0, τ, α4, α3, α2]
100.00% [24000/24000 00:03<00:00 Sampling 4 chains, 0 divergences]
Sampling 4 chains for 1_000 tune and 5_000 draw iterations (4_000 + 20_000 draws total) took 4 seconds.
az.summary(trace, var_names=["α"], filter_vars="like", kind="stats")
mean | sd | hdi_3% | hdi_97% | |
---|---|---|---|---|
α4 | -2.995 | 0.817 | -4.569 | -1.499 |
α3 | 3.994 | 0.877 | 2.378 | 5.707 |
α2 | 2.003 | 0.885 | 0.328 | 3.661 |
α1 | -3.002 | 1.008 | -4.943 | -1.137 |
α1-α2 | -5.005 | 1.589 | -8.087 | -2.071 |
α1-α3 | -6.997 | 1.582 | -9.985 | -4.006 |
α1-α4 | -0.007 | 1.507 | -2.815 | 2.912 |
α2-α3 | -1.991 | 1.419 | -4.665 | 0.697 |
α2-α4 | 4.998 | 1.355 | 2.432 | 7.570 |
α3-α4 | 6.989 | 1.342 | 4.488 | 9.550 |
az.plot_forest(trace, var_names=["α1", "α2", "α3", "α4"], combined=True)
array([<AxesSubplot: title={'center': '94.0% HDI'}>], dtype=object)
az.plot_forest(
trace, var_names=["~τ", "~μ0", "~α1", "~α2", "~α3", "~α4"], combined=True
)
array([<AxesSubplot: title={'center': '94.0% HDI'}>], dtype=object)
A more concise method#
Not necessarily pretty, but easier to extend to more treatments. I’m interested in seeing other peoples’ methods here; I feel like this could still be a lot cleaner.
# get possible combinations
combos = list(combinations(range(4), 2))
combos
[(0, 1), (0, 2), (0, 3), (1, 2), (1, 3), (2, 3)]
with pm.Model() as m:
mu0 = pm.Normal("μ0", mu=0, tau=0.0001)
tau = pm.Gamma("τ", 0.001, 0.001)
alphas = [pm.Normal(f"α{i}", mu=0, tau=0.0001) for i in range(2, 5)]
# sum-to-zero constraint
alphas.insert(0, pm.Deterministic("α1", -(alphas[0] + alphas[1] + alphas[2])))
mus = [
pm.Deterministic(f"mu{i + 1}", mu0 + alpha) for i, alpha in enumerate(alphas)
]
likelihoods = [
pm.Normal(f"lik{i + 1}", mu=mus[i], tau=tau, observed=data[i + 1])
for i, mu in enumerate(mus)
]
[pm.Deterministic(f"α{i + 1} - α{j + 1}", alphas[i] - alphas[j]) for i, j in combos]
trace = pm.sample(5000)
Show code cell output
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [μ0, τ, α2, α3, α4]
100.00% [24000/24000 00:03<00:00 Sampling 4 chains, 0 divergences]
Sampling 4 chains for 1_000 tune and 5_000 draw iterations (4_000 + 20_000 draws total) took 4 seconds.
az.summary(trace, var_names=["α"], filter_vars="like", kind="stats")
mean | sd | hdi_3% | hdi_97% | |
---|---|---|---|---|
α2 | 2.004 | 0.891 | 0.320 | 3.656 |
α3 | 4.009 | 0.888 | 2.302 | 5.631 |
α4 | -3.003 | 0.805 | -4.530 | -1.489 |
α1 | -3.010 | 1.035 | -4.925 | -0.993 |
α1 - α2 | -5.014 | 1.621 | -8.095 | -2.047 |
α1 - α3 | -7.019 | 1.617 | -9.971 | -3.907 |
α1 - α4 | -0.007 | 1.532 | -2.914 | 2.883 |
α2 - α3 | -2.005 | 1.438 | -4.775 | 0.659 |
α2 - α4 | 5.007 | 1.334 | 2.431 | 7.451 |
α3 - α4 | 7.011 | 1.331 | 4.489 | 9.524 |
%load_ext watermark
%watermark -n -u -v -iv -p pytensor
Last updated: Wed Mar 22 2023
Python implementation: CPython
Python version : 3.11.0
IPython version : 8.9.0
pytensor: 2.10.1
arviz: 0.14.0
pymc : 5.1.2
numpy: 1.24.2