import pymc as pm
import numpy as np
import arviz as az
from itertools import combinations

%load_ext lab_black

7. Coagulation*#

An example of Bayesian ANOVA.

Adapted from Unit 7: anovacoagulation.odc.

Here 24 animals are randomly allocated to 4 different diets, but the numbers allocated to different diets are not the same. The coagulation time for blood is measured for each animal. Are the diet-based differences significant?

Box, Hunter, Hunter; Statistics for Experimenters, p. 166

# cut and pasted data from .odc file
# fmt: off
times = (62, 60, 63, 59, 63, 67, 71, 64, 65, 66, 68, 66, 71, 67, 68, 68, 56, 62,
         60, 61, 63, 64, 63, 59)
diets = (1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4)
# fmt: on

# create dictionary where each key is a diet and values are lists of times
data = {}
for key, val in zip(diets, times):
    data.setdefault(key, []).append(val)
data
{1: [62, 60, 63, 59],
 2: [63, 67, 71, 64, 65, 66],
 3: [68, 66, 71, 67, 68, 68],
 4: [56, 62, 60, 61, 63, 64, 63, 59]}

Simple method#

No loops! If you’re using this style, 4 treatments is probably the max before it starts to get too annoying to type out.

with pm.Model() as m:
    mu0 = pm.Normal("μ0", mu=0, tau=0.0001)
    tau = pm.Gamma("τ", 0.001, 0.001)

    alpha4 = pm.Normal("α4", mu=0, tau=0.0001)
    alpha3 = pm.Normal("α3", mu=0, tau=0.0001)
    alpha2 = pm.Normal("α2", mu=0, tau=0.0001)
    # sum-to-zero constraint
    alpha1 = pm.Deterministic("α1", -(alpha2 + alpha3 + alpha4))

    mu_1 = mu0 + alpha1
    mu_2 = mu0 + alpha2
    mu_3 = mu0 + alpha3
    mu_4 = mu0 + alpha4

    pm.Normal("lik1", mu=mu_1, tau=tau, observed=data[1])
    pm.Normal("lik2", mu=mu_2, tau=tau, observed=data[2])
    pm.Normal("lik3", mu=mu_3, tau=tau, observed=data[3])
    pm.Normal("lik4", mu=mu_4, tau=tau, observed=data[4])

    onetwo = pm.Deterministic("α1-α2", alpha1 - alpha2)
    onethree = pm.Deterministic("α1-α3", alpha1 - alpha3)
    onefour = pm.Deterministic("α1-α4", alpha1 - alpha4)
    twothree = pm.Deterministic("α2-α3", alpha2 - alpha3)
    twofour = pm.Deterministic("α2-α4", alpha2 - alpha4)
    threefour = pm.Deterministic("α3-α4", alpha3 - alpha4)

    trace = pm.sample(5000)
Hide code cell output
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [μ0, τ, α4, α3, α2]
100.00% [24000/24000 00:03<00:00 Sampling 4 chains, 0 divergences]
Sampling 4 chains for 1_000 tune and 5_000 draw iterations (4_000 + 20_000 draws total) took 4 seconds.
az.summary(trace, var_names=["α"], filter_vars="like", kind="stats")
mean sd hdi_3% hdi_97%
α4 -2.995 0.817 -4.569 -1.499
α3 3.994 0.877 2.378 5.707
α2 2.003 0.885 0.328 3.661
α1 -3.002 1.008 -4.943 -1.137
α1-α2 -5.005 1.589 -8.087 -2.071
α1-α3 -6.997 1.582 -9.985 -4.006
α1-α4 -0.007 1.507 -2.815 2.912
α2-α3 -1.991 1.419 -4.665 0.697
α2-α4 4.998 1.355 2.432 7.570
α3-α4 6.989 1.342 4.488 9.550
az.plot_forest(trace, var_names=["α1", "α2", "α3", "α4"], combined=True)
array([<AxesSubplot: title={'center': '94.0% HDI'}>], dtype=object)
../_images/180e0cb16210041b6a5376db7663c2d9cc4d1c5a7bb50ef08c822b0bc2686532.png
az.plot_forest(
    trace, var_names=["~τ", "~μ0", "~α1", "~α2", "~α3", "~α4"], combined=True
)
array([<AxesSubplot: title={'center': '94.0% HDI'}>], dtype=object)
../_images/68112cb8c01f6cce5dfa9e9ad6f4e47a651d8cedff7e9a48e3cec4cc8aae91a3.png

A more concise method#

Not necessarily pretty, but easier to extend to more treatments. I’m interested in seeing other peoples’ methods here; I feel like this could still be a lot cleaner.

# get possible combinations
combos = list(combinations(range(4), 2))
combos
[(0, 1), (0, 2), (0, 3), (1, 2), (1, 3), (2, 3)]
with pm.Model() as m:
    mu0 = pm.Normal("μ0", mu=0, tau=0.0001)
    tau = pm.Gamma("τ", 0.001, 0.001)

    alphas = [pm.Normal(f{i}", mu=0, tau=0.0001) for i in range(2, 5)]

    # sum-to-zero constraint
    alphas.insert(0, pm.Deterministic("α1", -(alphas[0] + alphas[1] + alphas[2])))

    mus = [
        pm.Deterministic(f"mu{i + 1}", mu0 + alpha) for i, alpha in enumerate(alphas)
    ]

    likelihoods = [
        pm.Normal(f"lik{i + 1}", mu=mus[i], tau=tau, observed=data[i + 1])
        for i, mu in enumerate(mus)
    ]

    [pm.Deterministic(f{i + 1} - α{j + 1}", alphas[i] - alphas[j]) for i, j in combos]

    trace = pm.sample(5000)
Hide code cell output
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [μ0, τ, α2, α3, α4]
100.00% [24000/24000 00:03<00:00 Sampling 4 chains, 0 divergences]
Sampling 4 chains for 1_000 tune and 5_000 draw iterations (4_000 + 20_000 draws total) took 4 seconds.
az.summary(trace, var_names=["α"], filter_vars="like", kind="stats")
mean sd hdi_3% hdi_97%
α2 2.004 0.891 0.320 3.656
α3 4.009 0.888 2.302 5.631
α4 -3.003 0.805 -4.530 -1.489
α1 -3.010 1.035 -4.925 -0.993
α1 - α2 -5.014 1.621 -8.095 -2.047
α1 - α3 -7.019 1.617 -9.971 -3.907
α1 - α4 -0.007 1.532 -2.914 2.883
α2 - α3 -2.005 1.438 -4.775 0.659
α2 - α4 5.007 1.334 2.431 7.451
α3 - α4 7.011 1.331 4.489 9.524
%load_ext watermark
%watermark -n -u -v -iv -p pytensor
Last updated: Wed Mar 22 2023

Python implementation: CPython
Python version       : 3.11.0
IPython version      : 8.9.0

pytensor: 2.10.1

arviz: 0.14.0
pymc : 5.1.2
numpy: 1.24.2