notes/deflation.py

   1 from math import sqrt
   2 from statistics import mean, variance
   3
   4 from numpy.random import normal, seed
   5
   6 # seed the random number generator for reproducibility of given figures,
   7 # commment this out to run a new experiment
   8 seed(1)
   9
  10 def cohens_d(X, Y):
  11     return (
  12         (mean(X) - mean(Y)) /
  13         sqrt(
  14             (len(X)*variance(X) + len(Y)*variance(Y)) /
  15             (len(X) + len(Y))
  16         )
  17     )
  18
  19 def population_with_error(μ, ε, n):
  20     def trait():
  21         return normal(μ, 1)
  22     def measurement_error():
  23         return normal(0, ε)
  24     return [trait() + measurement_error() for _ in range(n)]
  25
  26
  27 # trait differs by 1 standard deviation
  28 true_f = population_with_error(1, 0, 10000)
  29 true_m = population_with_error(0, 0, 10000)
  30
  31 # as above, but with 0.5 standard units measurment error
  32 measured_f = population_with_error(1, 0.5, 10000)
  33 measured_m = population_with_error(0, 0.5, 10000)
  34
  35 true_d = cohens_d(true_f, true_m)
  36 print(true_d)  # 1.0193773432617055 — d≈1.0, as expected!
  37
  38 naïve_d = cohens_d(measured_f, measured_m)
  39 print(naïve_d)  # 0.8953395386313235 — deflated!
  40
  41
  42 def performance(μ_g, σ_g, s, n):
  43     def general_ability():
  44         return normal(μ_g, σ_g)
  45     def special_ability():
  46         return normal(s, 1)
  47     return [general_ability() + special_ability() for _ in range(n)]
  48
  49 # ♀ one standard deviation better than ♂ at the special factor
  50 population_f = performance(0, 1, 1, 10000)
  51 population_m = performance(0, 1, 0, 10000)
  52
  53 # ... but suppose we control/match for general intelligence
  54 matched_f = performance(0, 0, 1, 10000)
  55 matched_m = performance(0, 0, 0, 10000)
  56
  57 population_d = cohens_d(population_f, population_m)
  58 print(population_d)  # 0.7287587808164793 — deflated!
  59
  60 matched_d = cohens_d(matched_f, matched_m)
  61 print(matched_d)  # 1.018362581243161 — as you would expect