Scenario helpers

import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
from pymargins import Margins

rng = np.random.default_rng(42)
n = 2000
df = pd.DataFrame({
    "age": rng.integers(20, 75, n),
    "female": rng.binomial(1, 0.52, n),
    "treated": rng.binomial(1, 0.40, n),
    "region": rng.choice(["N", "S", "E", "W"], n),
    "group": rng.choice(["A", "B"], n),
    "preexist": rng.binomial(1, 0.4, n),
})
lp = (-1.5 + 0.04 * df["age"] - 0.3 * df["female"] + 0.8 * df["treated"]
      + 0.2 * (df["region"] == "S") + 0.4 * (df["region"] == "E")
      - 0.1 * (df["region"] == "W")
      + 0.6 * (df["group"] == "B") + 0.4 * df["preexist"])
df["y"] = rng.binomial(1, 1 / (1 + np.exp(-lp)))

fit = smf.glm("y ~ age + female + treated + C(region) + C(group) + preexist", data=df,
              family=sm.families.Binomial()).fit()
m = Margins.log_scale(fit, at="overall")

The pymargins.scenarios module ships factories for common contrast patterns so you don’t have to hand-build dicts and weight vectors.

from pymargins import pairwise, reference, at_levels, grid, did, diff, all_pairwise

pairwise(var, [a, b]) — two scenarios, [+1, -1] contrast

scen, w = pairwise("treated", [1, 0])
print(m.contrasts(scenarios=scen, contrasts=w).summary())
=============================================================
              Margins Result (delta, level=0.95)             
=============================================================
           estimate  std err       z  P>|z|  [95% Conf. Int.]
-------------------------------------------------------------
treated=1    1.2247   0.0253  8.0232  0.000    1.1656, 1.2869
=============================================================

n = 2000
Note: std err is on the inference scale; estimate and CI are on the reporting scale.
κ: 0.046
Delta-vs-sim disagreement: 0.048%

reference(var, levels, ref_level=...) — each level vs baseline

scen, W = reference("region", ["N", "S", "E", "W"], ref_level="N")
print(m.contrasts(scenarios=scen, contrasts=W).summary())
===========================================================
             Margins Result (delta, level=0.95)            
===========================================================
        estimate  std err        z  P>|z|  [95% Conf. Int.]
-----------------------------------------------------------
S_vs_N    0.9850   0.0381  -0.3969  0.691    0.9142, 1.0613
E_vs_N    1.0892   0.0341   2.5057  0.012    1.0188, 1.1646
W_vs_N    0.9357   0.0395  -1.6794  0.093    0.8660, 1.0112
===========================================================

n = 2000
Note: std err is on the inference scale; estimate and CI are on the reporting scale.
κ: max=0.052
Delta-vs-sim disagreement: 0.597%

all_pairwise(var, levels) — every level pair

scen, W = all_pairwise("region", ["N", "S", "E", "W"])

at_levels(var, levels=[...]) — predict at each level

print(m.predict(atexog={"region": ["N", "S", "E", "W"]}).summary())
==============================================================
              Margins Result (delta, level=0.95)              
==============================================================
          estimate  std err         z  P>|z|  [95% Conf. Int.]
--------------------------------------------------------------
region=N    0.7177   0.0265  -12.5068  0.000    0.6814, 0.7560
region=S    0.7069   0.0273  -12.7032  0.000    0.6701, 0.7458
region=E    0.7818   0.0214  -11.4813  0.000    0.7496, 0.8153
region=W    0.6716   0.0293  -13.5727  0.000    0.6341, 0.7113
==============================================================

n = 2000
Note: std err is on the inference scale; estimate and CI are on the reporting scale.
κ: max=0.075
Delta-vs-sim disagreement: 0.707%

grid(**vars) — Cartesian product of counterfactual values

print(m.predict(atexog={"age": [25, 45, 65], "treated": [0, 1]}).summary())
=======================================================================
                   Margins Result (delta, level=0.95)                  
=======================================================================
                   estimate  std err         z  P>|z|  [95% Conf. Int.]
-----------------------------------------------------------------------
age=25, treated=0    0.4721   0.0462  -16.2417  0.000    0.4312, 0.5168
age=25, treated=1    0.6728   0.0352  -11.2429  0.000    0.6279, 0.7209
age=45, treated=0    0.6590   0.0208  -20.0132  0.000    0.6326, 0.6865
age=45, treated=1    0.8178   0.0172  -11.6708  0.000    0.7907, 0.8459
age=65, treated=0    0.8083   0.0181  -11.7357  0.000    0.7800, 0.8375
age=65, treated=1    0.9085   0.0115   -8.3763  0.000    0.8883, 0.9291
=======================================================================

n = 2000
Note: std err is on the inference scale; estimate and CI are on the reporting scale.
κ: max=0.112
Delta-vs-sim disagreement: 1.091%

did(g, c, ...) — 2×2 DiD

scen, w = did("group", "preexist",
              treated_level="B", control_level="A",
              post_level=1, pre_level=0)
print(m.contrasts(scenarios=scen, contrasts=w).summary())
========================================================
           Margins Result (delta, level=0.95)           
========================================================
     estimate  std err        z  P>|z|  [95% Conf. Int.]
--------------------------------------------------------
did    0.9542   0.0125  -3.7399  0.000    0.9311, 0.9780
========================================================

n = 2000
Note: std err is on the inference scale; estimate and CI are on the reporting scale.
κ: max=0.140
Delta-vs-sim disagreement: 0.130%

diff(n)[-1, 0, …, 0, +1] for an ordered grid

scen = at_levels("age", levels=[25, 45, 65])
print(m.contrasts(scenarios=scen, contrasts=diff(3)).summary())   # age=65 minus age=25
===========================================================
             Margins Result (delta, level=0.95)            
===========================================================
        estimate  std err        z  P>|z|  [95% Conf. Int.]
-----------------------------------------------------------
age=25    0.7629   0.0290  -9.3229  0.000    0.7207, 0.8076
===========================================================

n = 2000
Note: std err is on the inference scale; estimate and CI are on the reporting scale.
κ: 0.048
Delta-vs-sim disagreement: 0.475%