import pymc as pm
import numpy as np
import arviz as az

%load_ext lab_black

6. Gesell*#

Adapted from Unit 9: Gesell.odc.

Gesell Developmental Schedules measure child development. The scoring is devised by the American child psychologist and pediatrician Arnold Gesell who founded the Clinic of Child Development at Yale in 1911 and directed it for many years. The Gesell Developmental Schedules are a gauge of the status of a child’s motor and language development and personal-social and adaptive behaviors.

For 21 children the age in months at which they first spoke and their Gesell Adaptive Score, which is the result of an aptitude test taken much later was recorded. These data were originally collected by L. M. Linde of UCLA but were first published by M. R. Mickey, O. J. Dunn, and V. Clark,  †Note on the use of stepwise regression in detecting outliers, † Computers and Biomedical Research, 1 (1967), pp. 105†111.

The data have been used by several authors, e.g., N. R. Draper and J. A. John,  †Influential observations and outliers in regression, † Technometrics, 23 (1981), pp. 21†26.

Let X be the age in in months a child speaks his/her first word and let Y be the Gesell adaptive score, a measure of a child†s aptitude (observed later on).

Correlation of r = ¢0.640 indicates a moderately strong negative relationship between age at first word spoken and Gesell score. How does the child†s aptitude change with how long it takes them to speak?

Are there any influential observations/potential outliers?

The solution follows a two-stage model:

a. Obtain mean ICPOs. Outside BUGS take reciprocal of mean ICPOs (these are CPOs) and consder them as an input for model B.

b. Read in and calibrate CPOs with posterior ordinates.

# fmt: off
x = np.array(
    (15, 26, 10, 9, 15, 20, 18, 11, 8, 20, 7, 9, 10, 11, 11, 10, 12, 42, 17, 11, 10)
)

y = np.array(
    (95, 71, 83, 91, 102, 87, 93, 100, 104, 94, 113, 96, 83, 84, 102, 100, 105, 57, 121, 86, 100)
)
# fmt: on

Part A#

with pm.Model() as model_a:
    # couldn't get .001 working, answers are pretty close with .003
    tau = pm.Gamma("tau", 1, 0.003)
    s2 = pm.Deterministic("s2", 1 / tau)
    b = pm.Normal("b", 0, tau=0.00001, shape=2)

    m = b[0] + b[1] * x
    r = y - m
    f = pm.Deterministic(
        "cpo", (tau / (2 * np.pi)) ** 0.5 * pm.math.exp(-0.5 * tau * r**2)
    )
    # what is the point of this when we want cpo anyways?
    icpo = pm.Deterministic("icpo", 1 / f)
    pm.Normal("likelihood", mu=m, tau=tau, observed=y)

    trace = pm.sample(10000, target_accept=0.99)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [tau, b]
100.00% [44000/44000 00:12<00:00 Sampling 4 chains, 0 divergences]
Sampling 4 chains for 1_000 tune and 10_000 draw iterations (4_000 + 40_000 draws total) took 12 seconds.
icpo_df = az.summary(trace, var_names="icpo", round_to=5, filter_vars="like")

ICPO results match BUGS.

icpo_df
mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_bulk ess_tail r_hat
icpo[0] 28.41121 4.72845 20.47770 37.47112 0.03871 0.02782 15659.18205 18080.97067 1.00025
icpo[1] 47.99006 25.25222 22.45428 86.80183 0.16691 0.11803 24147.00691 27144.47489 1.00002
icpo[2] 92.74176 50.16799 36.13296 174.04703 0.34291 0.24714 20603.45565 24329.10258 1.00002
icpo[3] 40.43944 10.35050 25.06100 59.23434 0.07383 0.05275 20882.59211 24124.73361 1.00011
icpo[4] 40.79267 8.65556 26.98754 56.65844 0.04918 0.03630 35476.06045 25721.99214 1.00020
icpo[5] 28.25061 4.82800 20.19374 37.55486 0.04028 0.02886 15011.41745 16641.73097 1.00024
icpo[6] 29.57640 5.10526 21.10853 39.38356 0.03996 0.02868 17227.04521 20263.02467 1.00029
icpo[7] 28.85189 4.92518 20.58232 38.16269 0.04046 0.02918 15689.37653 17820.85901 1.00026
icpo[8] 29.73159 5.54447 20.46714 39.87133 0.04674 0.03423 15531.08248 16684.26037 1.00012
icpo[9] 34.85221 7.58887 22.65502 48.60123 0.05036 0.03646 24925.54890 25441.97958 1.00029
icpo[10] 51.98607 20.45696 26.47904 86.94689 0.15323 0.10835 18600.37825 23899.58560 1.00022
icpo[11] 30.07400 5.48479 21.34334 40.68441 0.04466 0.03232 16205.68697 19463.49205 1.00028
icpo[12] 92.74176 50.16799 36.13296 174.04703 0.34291 0.24714 20603.45565 24329.10258 1.00002
icpo[13] 66.90891 25.82705 33.93229 112.22439 0.16693 0.12028 23979.48913 25991.43371 1.00000
icpo[14] 30.81508 5.47764 21.38196 40.76289 0.04312 0.03136 17492.07762 19548.03039 1.00014
icpo[15] 28.35557 4.84480 19.98556 37.36930 0.04058 0.02926 15048.80258 17249.29806 1.00025
icpo[16] 39.67949 8.56248 26.03851 55.23164 0.05547 0.04009 26168.71713 24969.61763 1.00010
icpo[17] 69.22087 680.03534 18.49164 134.81628 4.82859 3.41438 18597.96049 21892.85609 1.00005
icpo[18] 6420.92323 41067.00099 81.53965 17103.15954 252.18793 178.32576 19089.13567 21192.00256 1.00004
icpo[19] 52.23689 15.70697 30.16757 80.84616 0.09984 0.07145 25294.32863 27260.16938 1.00003
icpo[20] 28.35557 4.84480 19.98556 37.36930 0.04058 0.02926 15048.80258 17249.29806 1.00025
cpo_means = 1 / icpo_df["mean"].to_numpy()
cpo_means
array([0.03519737, 0.02083765, 0.01078263, 0.02472833, 0.02451421,
       0.03539747, 0.03381074, 0.03465977, 0.03363426, 0.02869259,
       0.01923592, 0.03325131, 0.01078263, 0.01494569, 0.03245164,
       0.03526644, 0.02520194, 0.01444651, 0.00015574, 0.01914356,
       0.03526644])

Part B#

mlcpo = -2 * np.log(cpo_means)
MPQ = np.sum(mlcpo)
with pm.Model() as model_b:
    tau = pm.Gamma("tau", 1, 0.002)
    s2 = pm.Deterministic("s2", 1 / tau)
    b = pm.Normal("b", 0, tau=0.00001, shape=2)

    m = b[0] + b[1] * x
    r = y - m
    f = pm.Deterministic(
        "f", (tau / (2 * np.pi)) ** 0.5 * pm.math.exp(-0.5 * tau * r**2)
    )
    c = pm.Deterministic("c", cpo_means / f)

    g1 = pm.Deterministic("g1", c * pm.math.log(c))
    g2 = -pm.math.log(c)
    g3 = 0.5 * np.absolute((c - 1))
    g4 = (c - 1) ** 2

    pm.Deterministic("s_g1", pm.math.sum(g1))
    pm.Deterministic("s_g2", pm.math.sum(g2))
    pm.Deterministic("s_g3", pm.math.sum(g3))
    pm.Deterministic("s_g4", pm.math.sum(g4))

    pm.Normal("likelihood", mu=m, tau=tau, observed=y)

    traceb = pm.sample(
        2000, target_accept=0.95, idata_kwargs=dict(log_likelihood=True)
    )
Hide code cell output
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [tau, b]
100.00% [12000/12000 00:02<00:00 Sampling 4 chains, 0 divergences]
Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 3 seconds.
az.summary(traceb, var_names="c", filter_vars="like", kind="stats")
mean sd hdi_3% hdi_97%
c[0] 0.999 0.167 0.722 1.321
c[1] 1.008 0.506 0.484 1.851
c[2] 1.010 0.560 0.402 1.932
c[3] 1.002 0.264 0.617 1.478
c[4] 0.998 0.220 0.652 1.386
c[5] 1.000 0.173 0.711 1.329
c[6] 0.998 0.177 0.691 1.312
c[7] 0.998 0.169 0.710 1.309
c[8] 0.998 0.183 0.696 1.338
c[9] 0.998 0.231 0.638 1.396
c[10] 1.001 0.390 0.516 1.667
c[11] 0.999 0.182 0.698 1.346
c[12] 1.010 0.560 0.402 1.932
c[13] 1.008 0.401 0.487 1.683
c[14] 0.998 0.177 0.700 1.322
c[15] 0.998 0.169 0.706 1.304
c[16] 0.999 0.219 0.665 1.414
c[17] 0.900 3.238 0.262 1.932
c[18] 1.108 8.071 0.015 2.747
c[19] 1.006 0.313 0.573 1.563
c[20] 0.998 0.169 0.706 1.304
az.summary(traceb, var_names="s_g", filter_vars="like", kind="stats")
mean sd hdi_3% hdi_97%
s_g1 3.623 46.797 -2.704 9.321
s_g2 2.377 1.228 0.168 3.888
s_g3 2.892 4.585 1.173 5.369
s_g4 77.429 2428.328 0.574 16.297
az.summary(traceb, var_names="g1", kind="stats")
mean sd hdi_3% hdi_97%
g1[0] 0.012 0.177 -0.256 0.333
g1[1] 0.105 0.779 -0.363 1.070
g1[2] 0.128 0.883 -0.368 1.223
g1[3] 0.034 0.313 -0.314 0.553
g1[4] 0.021 0.250 -0.287 0.441
g1[5] 0.014 0.185 -0.264 0.343
g1[6] 0.013 0.190 -0.258 0.353
g1[7] 0.012 0.179 -0.260 0.331
g1[8] 0.014 0.199 -0.270 0.364
g1[9] 0.022 0.270 -0.301 0.450
g1[10] 0.064 0.524 -0.352 0.813
g1[11] 0.015 0.196 -0.268 0.371
g1[12] 0.128 0.883 -0.368 1.223
g1[13] 0.075 0.544 -0.357 0.864
g1[14] 0.013 0.189 -0.254 0.364
g1[15] 0.012 0.178 -0.258 0.331
g1[16] 0.021 0.247 -0.301 0.441
g1[17] 0.528 16.013 -0.368 1.271
g1[18] 2.330 43.596 -0.368 2.773
g1[19] 0.049 0.390 -0.337 0.663
g1[20] 0.012 0.178 -0.258 0.331
az.waic(traceb)
/Users/aaron/mambaforge/envs/pymc_env2/lib/python3.11/site-packages/arviz/stats/stats.py:1645: UserWarning: For one or more samples the posterior variance of the log predictive densities exceeds 0.4. This could be indication of WAIC starting to fail. 
See http://arxiv.org/abs/1507.04544 for details
  warnings.warn(
Computed from 8000 posterior samples and 21 observations log-likelihood matrix.

          Estimate       SE
elpd_waic   -82.67     5.08
p_waic        3.39        -

There has been a warning during the calculation. Please check the results.
%load_ext watermark
%watermark -n -u -v -iv -p pytensor
Last updated: Wed Mar 22 2023

Python implementation: CPython
Python version       : 3.11.0
IPython version      : 8.9.0

pytensor: 2.10.1

numpy: 1.24.2
arviz: 0.14.0
pymc : 5.1.2