EffectSize.php 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. <?php
  2. namespace MathPHP\Statistics;
  3. use MathPHP\Exception;
  4. /**
  5. * Effect size is a quantitative measure of the strength of a phenomenon.
  6. * https://en.wikipedia.org/wiki/Effect_size
  7. *
  8. * - η² (Eta-squared)
  9. * - η²p (Partial eta-squared)
  10. * - ω² (omega-squared)
  11. * - Cohen's ƒ²
  12. * - Cohen's q
  13. * - Cohen's d
  14. * - Hedges' g
  15. * - Glass' Δ (glass' delta)
  16. */
  17. class EffectSize
  18. {
  19. /**
  20. * η² (Eta-squared)
  21. *
  22. * Eta-squared describes the ratio of variance explained in the dependent
  23. * variable by a predictor while controlling for other predictors, making
  24. * it analogous to the r².
  25. * https://en.wikipedia.org/wiki/Effect_size#Eta-squared_.28.CE.B72.29
  26. *
  27. * SSt
  28. * η² = ---
  29. * SST
  30. *
  31. * where:
  32. * SSt = sum of squares treatment
  33. * SST = sum of squares total
  34. *
  35. * @param float $SSt Sum of squares treatment
  36. * @param float $SST Sum of squares total
  37. *
  38. * @return float
  39. */
  40. public static function etaSquared(float $SSt, float $SST): float
  41. {
  42. return $SSt / $SST;
  43. }
  44. /**
  45. * η²p (Partial eta-squared)
  46. *
  47. * https://en.wikipedia.org/wiki/Effect_size#Eta-squared_.28.CE.B72.29
  48. *
  49. * SSt
  50. * η²p = ---------
  51. * SSt + SSE
  52. *
  53. * where:
  54. * SSt = sum of squares treatment
  55. * SSE = sum of squares error
  56. *
  57. * @param float $SSt Sum of squares treatment
  58. * @param float $SSE Sum of squares error
  59. *
  60. * @return float
  61. */
  62. public static function partialEtaSquared(float $SSt, float $SSE): float
  63. {
  64. return $SSt / ($SSt + $SSE);
  65. }
  66. /**
  67. * ω² (omega-squared)
  68. *
  69. * A less biased estimator of the variance explained in the population.
  70. * https://en.wikipedia.org/wiki/Effect_size#Omega-squared_.28.CF.892.29
  71. *
  72. * SSt - dft * MSE
  73. * ω² = ---------------
  74. * SST + MSE
  75. *
  76. * where:
  77. * SSt = sum of squares treatment
  78. * SST = sum of squares total
  79. * dft = degrees of freedom treatment
  80. * MSE = Mean squares error
  81. *
  82. * @param float $SSt Sum of squares treatment
  83. * @param int $dft Degrees of freedom treatment
  84. * @param float $SST Sum of squares total
  85. * @param float $MSE Mean squares error
  86. *
  87. * @return float
  88. */
  89. public static function omegaSquared(float $SSt, int $dft, float $SST, float $MSE): float
  90. {
  91. return ($SSt - $dft * $MSE) / ($SST + $MSE);
  92. }
  93. /**
  94. * Cohen's ƒ²
  95. *
  96. * One of several effect size measures to use in the context of an F-test
  97. * for ANOVA or multiple regression. Its amount of bias (overestimation of
  98. * the effect size for the ANOVA) depends on the bias of its underlying
  99. * measurement of variance explained (R², η², ω²)
  100. * https://en.wikipedia.org/wiki/Effect_size#Cohen.27s_.C6.922
  101. *
  102. * R²
  103. * ƒ² = ------
  104. * 1 - R²
  105. *
  106. * η²
  107. * ƒ² = ------
  108. * 1 - η²
  109. *
  110. * ω²
  111. * ƒ² = ------
  112. * 1 - ω²
  113. *
  114. * @param float $measure_of_variance_explained (R², η², ω²)
  115. *
  116. * @return float
  117. */
  118. public static function cohensF(float $measure_of_variance_explained): float
  119. {
  120. return $measure_of_variance_explained / (1 - $measure_of_variance_explained);
  121. }
  122. /**
  123. * Cohen's q
  124. *
  125. * The difference between two Fisher transformed Pearson regression coefficients.
  126. * hhttps://en.wikipedia.org/wiki/Effect_size#Cohen.27s_q
  127. *
  128. * 1 1 + r₁ 1 1 + r₂
  129. * q = - log ------ - - log ------
  130. * 2 1 - r₁ 2 1 - r₂
  131. *
  132. * where r₁ and r₂ are the regressions being compared
  133. *
  134. * @param float $r₁
  135. * @param float $r₂
  136. *
  137. * @return float
  138. *
  139. * @throws Exception\OutOfBoundsException if an r is ≤ 0
  140. */
  141. public static function cohensQ(float $r₁, float $r₂): float
  142. {
  143. if ($r₁ >= 1 || $r₂ >= 1) {
  144. throw new Exception\OutOfBoundsException('r must be greater than or equal to 1');
  145. }
  146. $½ = 0.5;
  147. return \abs(($½ * \log((1 + $r₁) / (1 - $r₁))) - ($½ * \log((1 + $r₂) / (1 - $r₂))));
  148. }
  149. /**
  150. * Cohen's d
  151. *
  152. * The difference between two means divided by a standard deviation for the data.
  153. * https://en.wikipedia.org/wiki/Effect_size#Cohen.27s_d
  154. *
  155. * μ₁ - μ₂
  156. * d = -------
  157. * s
  158. *
  159. * _________
  160. * /s₁² + s₂²
  161. * s = / ---------
  162. * √ 2
  163. *
  164. * where
  165. * μ₁ = mean of sample population 1
  166. * μ₂ = mean of sample population 2
  167. * s₁² = variance of sample population 1
  168. * s₂² = variance of sample population 1
  169. * s = pooled standard deviation
  170. *
  171. * This formula uses the common simplified version of the pooled standard deviation.
  172. *
  173. * @param float $μ₁ Mean of sample population 1
  174. * @param float $μ₂ Mean of sample population 2
  175. * @param float $s₁ Standard deviation of sample population 1
  176. * @param float $s₂ Standard deviation of sample population 2
  177. *
  178. * @return float
  179. */
  180. public static function cohensD(float $μ₁, float $μ₂, float $s₁, float $s₂): float
  181. {
  182. // Variance of each data set
  183. $s₁² = $s₁ * $s₁;
  184. $s₂² = $s₂ * $s₂;
  185. // Pooled standard deviation
  186. $s = \sqrt(($s₁² + $s₂²) / 2);
  187. // d
  188. return ($μ₁ - $μ₂) / $s;
  189. }
  190. /**
  191. * Hedges' g
  192. *
  193. * The difference between two means divided by a standard deviation for the data.
  194. * https://en.wikipedia.org/wiki/Effect_size#Hedges.27_g
  195. * http://www.polyu.edu.hk/mm/effectsizefaqs/effect_size_equations2.html
  196. *
  197. * μ₁ - μ₂
  198. * g = -------
  199. * s*
  200. *
  201. * _________________________
  202. * /(n₁ - 1)s₁² + (n₂ - 1)s₂²
  203. * s* = / -------------------------
  204. * √ n₁ + n₂ - 2
  205. *
  206. *
  207. * Then, to remove bias
  208. *
  209. * / 3 \
  210. * g* ≈ | 1 - -------------- | g
  211. * \ 4(n₁ + n₂) - 9 /
  212. *
  213. * where
  214. * μ₁ = mean of sample population 1
  215. * μ₂ = mean of sample population 2
  216. * s₁² = variance of sample population 1
  217. * s₂² = variance of sample population 1
  218. * n₁ = sample size of sample population 1
  219. * n₂ = sample size of sample population 2
  220. * s* = pooled standard deviation
  221. *
  222. * @param float $μ₁ Mean of sample population 1
  223. * @param float $μ₂ Mean of sample population 2
  224. * @param float $s₁ Standard deviation of sample population 1
  225. * @param float $s₂ Standard deviation of sample population 2
  226. * @param int $n₁ Sample size of sample popluation 1
  227. * @param int $n₂ Sample size of sample popluation 2
  228. *
  229. * @return float
  230. */
  231. public static function hedgesG(float $μ₁, float $μ₂, float $s₁, float $s₂, int $n₁, int $n₂): float
  232. {
  233. // Variance of each data set
  234. $s₁² = $s₁ * $s₁;
  235. $s₂² = $s₂ * $s₂;
  236. // Pooled standard deviation
  237. $⟮n₁ − 1⟯s₁² + ⟮n₂ − 1⟯s₂² = (($n₁ - 1) * $s₁²) + (($n₂ - 1) * $s₂²);
  238. $⟮n₁ + n₂ − 2⟯ = $n₁ + $n₂ - 2;
  239. $s* = \sqrt($⟮n₁ − 1⟯s₁² + ⟮n₂ − 1⟯s₂² / $⟮n₁ + n₂ − 2⟯);
  240. // g
  241. $g = ($μ₁ - $μ₂) / $s*;
  242. // Unbiased g
  243. return (1 - (3 / (4 * ($n₁ + $n₂) - 9))) * $g;
  244. }
  245. /**
  246. * Glass' Δ (glass' delta)
  247. *
  248. * An estimator of the effect size that uses only the standard deviation of
  249. * the second group.
  250. * https://en.wikipedia.org/wiki/Effect_size#Glass.27_.CE.94
  251. *
  252. * μ₁ - μ₂
  253. * Δ = -------
  254. * s₂
  255. *
  256. * where
  257. * μ₁ = mean of sample population 1
  258. * μ₂ = mean of sample population 2
  259. * s₂ = standard deviation of sample population 2
  260. *
  261. * @param float $μ₁ Mean of sample population 1
  262. * @param float $μ₂ Mean of sample population 2
  263. * @param float $s₂ Standard deviation of sample population 2
  264. *
  265. * @return float
  266. */
  267. public static function glassDelta(float $μ₁, float $μ₂, float $s₂): float
  268. {
  269. return ($μ₁ - $μ₂) / $s₂;
  270. }
  271. }