ANOVATest.php 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718
  1. <?php
  2. namespace MathPHP\Tests\Statistics;
  3. use MathPHP\Statistics\ANOVA;
  4. use MathPHP\Exception;
  5. class ANOVATest extends \PHPUnit\Framework\TestCase
  6. {
  7. /**
  8. * @test oneWay with three samples
  9. * @dataProvider dataProviderForOneWayWithThreeSamples
  10. * @param array $sample1
  11. * @param array $sample2
  12. * @param array $sample3
  13. * @param array $expected
  14. */
  15. public function testOneWayWithThreeSamples(array $sample1, array $sample2, array $sample3, array $expected)
  16. {
  17. // When
  18. $anova = ANOVA::oneWay($sample1, $sample2, $sample3);
  19. // Then
  20. $this->assertEqualsWithDelta($expected, $anova, 0.0001);
  21. }
  22. /**
  23. * @return array [sample1, sample2, sample3, expectedAnova]
  24. */
  25. public function dataProviderForOneWayWithThreeSamples(): array
  26. {
  27. return [
  28. [
  29. [1, 2, 3],
  30. [3, 4, 5],
  31. [5, 6, 7],
  32. [
  33. 'ANOVA' => [
  34. 'treatment' => [
  35. 'SS' => 24,
  36. 'df' => 2,
  37. 'MS' => 12,
  38. 'F' => 12,
  39. 'P' => 0.008,
  40. ],
  41. 'error' => [
  42. 'SS' => 6,
  43. 'df' => 6,
  44. 'MS' => 1,
  45. ],
  46. 'total' => [
  47. 'SS' => 30,
  48. 'df' => 8,
  49. ],
  50. ],
  51. 'total_summary' => [
  52. 'n' => 9,
  53. 'sum' => 36,
  54. 'mean' => 4,
  55. 'SS' => 174,
  56. 'variance' => 3.75,
  57. 'sd' => 1.9365,
  58. 'sem' => 0.6455,
  59. ],
  60. 'data_summary' => [
  61. 0 => [
  62. 'n' => 3,
  63. 'sum' => 6,
  64. 'mean' => 2,
  65. 'SS' => 14,
  66. 'variance' => 1,
  67. 'sd' => 1,
  68. 'sem' => 0.5774,
  69. ],
  70. 1 => [
  71. 'n' => 3,
  72. 'sum' => 12,
  73. 'mean' => 4,
  74. 'SS' => 50,
  75. 'variance' => 1,
  76. 'sd' => 1,
  77. 'sem' => 0.5774,
  78. ],
  79. 2 => [
  80. 'n' => 3,
  81. 'sum' => 18,
  82. 'mean' => 6,
  83. 'SS' => 110,
  84. 'variance' => 1,
  85. 'sd' => 1,
  86. 'sem' => 0.5774,
  87. ],
  88. ],
  89. ],
  90. ],
  91. [
  92. [6, 8, 4, 5, 3, 4],
  93. [8, 12, 9, 11, 6, 8],
  94. [13, 9, 11, 8, 7, 12],
  95. [
  96. 'ANOVA' => [
  97. 'treatment' => [
  98. 'SS' => 84,
  99. 'df' => 2,
  100. 'MS' => 42,
  101. 'F' => 9.26477400569122,
  102. 'P' => 0.002404,
  103. ],
  104. 'error' => [
  105. 'SS' => 68,
  106. 'df' => 15,
  107. 'MS' => 4.5333,
  108. ],
  109. 'total' => [
  110. 'SS' => 152,
  111. 'df' => 17,
  112. ],
  113. ],
  114. 'total_summary' => [
  115. 'n' => 18,
  116. 'sum' => 144,
  117. 'mean' => 8,
  118. 'SS' => 1304,
  119. 'variance' => 8.9412,
  120. 'sd' => 2.9902,
  121. 'sem' => 0.7048,
  122. ],
  123. 'data_summary' => [
  124. 0 => [
  125. 'n' => 6,
  126. 'sum' => 30,
  127. 'mean' => 5,
  128. 'SS' => 166,
  129. 'variance' => 3.2,
  130. 'sd' => 1.7889,
  131. 'sem' => 0.7303,
  132. ],
  133. 1 => [
  134. 'n' => 6,
  135. 'sum' => 54,
  136. 'mean' => 9,
  137. 'SS' => 510,
  138. 'variance' => 4.8,
  139. 'sd' => 2.1909,
  140. 'sem' => 0.8944,
  141. ],
  142. 2 => [
  143. 'n' => 6,
  144. 'sum' => 60,
  145. 'mean' => 10,
  146. 'SS' => 628,
  147. 'variance' => 5.6,
  148. 'sd' => 2.3664,
  149. 'sem' => 0.9661,
  150. ],
  151. ],
  152. ],
  153. ],
  154. ];
  155. }
  156. /**
  157. * @test oneWay with four samples
  158. * @dataProvider dataProviderForOneWayWithFourSamples
  159. * @param array $sample1
  160. * @param array $sample2
  161. * @param array $sample3
  162. * @param array $sample4
  163. * @param array $expected
  164. */
  165. public function testOneWayWithFourSamples(array $sample1, array $sample2, array $sample3, array $sample4, array $expected)
  166. {
  167. // When
  168. $anova = ANOVA::oneWay($sample1, $sample2, $sample3, $sample4);
  169. // Then
  170. $this->assertEqualsWithDelta($expected, $anova, 0.0001);
  171. }
  172. /**
  173. * @return array [sample1, sample2, sample3, sample4, expetedAnova]
  174. */
  175. public function dataProviderForOneWayWithFourSamples(): array
  176. {
  177. return [
  178. [
  179. [0.28551035, 0.338524035, 0.088313218, 0.205930807, 0.363240102],
  180. [0.52173913, 0.763358779, 0.32546786, 0.425305688, 0.378071834],
  181. [0.989119683, 1.192718142, 0.788288288, 0.549176236, 0.544588155],
  182. [1.26705653, 1.625320787, 1.266108976, 1.154187629, 1.268498943],
  183. [
  184. 'ANOVA' => [
  185. 'treatment' => [
  186. 'SS' => 3.176758,
  187. 'df' => 3,
  188. 'MS' => 1.058919,
  189. 'F' => 27.5254,
  190. 'P' => 1.4876e-06,
  191. ],
  192. 'error' => [
  193. 'SS' => 0.615529,
  194. 'df' => 16,
  195. 'MS' => 0.038471,
  196. ],
  197. 'total' => [
  198. 'SS' => 3.792287,
  199. 'df' => 19,
  200. ],
  201. ],
  202. 'total_summary' => [
  203. 'n' => 20,
  204. 'sum' => 14.340525,
  205. 'mean' => 0.717026,
  206. 'SS' => 14.07482,
  207. 'variance' => 0.199594,
  208. 'sd' => 0.446759,
  209. 'sem' => 0.099898,
  210. ],
  211. 'data_summary' => [
  212. 0 => [
  213. 'n' => 5,
  214. 'sum' => 1.281519,
  215. 'mean' => 0.256304,
  216. 'SS' => 0.378265,
  217. 'variance' => 0.012452,
  218. 'sd' => 0.111587,
  219. 'sem' => 0.049903,
  220. ],
  221. 1 => [
  222. 'n' => 5,
  223. 'sum' => 2.413943,
  224. 'mean' => 0.482789,
  225. 'SS' => 1.284681,
  226. 'variance' => 0.029814,
  227. 'sd' => 0.172668,
  228. 'sem' => 0.077219,
  229. ],
  230. 2 => [
  231. 'n' => 5,
  232. 'sum' => 4.063891,
  233. 'mean' => 0.812778,
  234. 'SS' => 3.620504,
  235. 'variance' => 0.079366,
  236. 'sd' => 0.281719,
  237. 'sem' => 0.125989,
  238. ],
  239. 3 => [
  240. 'n' => 5,
  241. 'sum' => 6.581173,
  242. 'mean' => 1.316235,
  243. 'SS' => 8.791371,
  244. 'variance' => 0.032251,
  245. 'sd' => 0.179585,
  246. 'sem' => 0.080313,
  247. ],
  248. ],
  249. ],
  250. ],
  251. ];
  252. }
  253. /**
  254. * @test oneWay throws a BadDataException if there are fewer than three samples
  255. */
  256. public function testOneWayExceptionLessThanThreeSamples()
  257. {
  258. // Given
  259. $sample1 = [1, 2, 3];
  260. $sample2 = [3, 4, 5];
  261. // Then
  262. $this->expectException(Exception\BadDataException::class);
  263. // When
  264. ANOVA::oneWay($sample1, $sample2);
  265. }
  266. /**
  267. * @test oneWay throws a BadDataException if the samples of different sample sizes
  268. */
  269. public function testOneWayExceptionDifferentSampleSizes()
  270. {
  271. // Given
  272. $sample1 = [1, 2, 3];
  273. $sample2 = [3, 4, 5, 6];
  274. $sample3 = [5, 6, 7, 8, 9];
  275. // Then
  276. $this->expectException(Exception\BadDataException::class);
  277. // When
  278. ANOVA::oneWay($sample1, $sample2, $sample3);
  279. }
  280. /**
  281. * @test Axioms of one-way ANOVA results using three samples
  282. * @dataProvider dataProviderForOneWayAxiomsThreeSamples
  283. * @param array $sample1
  284. * @param array $sample2
  285. * @param array $sample3
  286. */
  287. public function testOneWayAxiomsThreeSamples(array $sample1, array $sample2, array $sample3)
  288. {
  289. // When
  290. $anova = ANOVA::oneWay($sample1, $sample2, $sample3);
  291. // Then SST = SSB + SSW
  292. $SST = $anova['ANOVA']['total']['SS'];
  293. $SSB = $anova['ANOVA']['treatment']['SS'];
  294. $SSW = $anova['ANOVA']['error']['SS'];
  295. $this->assertEqualsWithDelta($SST, $SSB + $SSW, 0.00001);
  296. // And dfT = dfB + dfW
  297. $dfT = $anova['ANOVA']['total']['df'];
  298. $dfB = $anova['ANOVA']['treatment']['df'];
  299. $dfW = $anova['ANOVA']['error']['df'];
  300. $this->assertEqualsWithDelta($dfT, $dfB + $dfW, 0.00001);
  301. }
  302. /**
  303. * @return array [sample1, sample2, sample3
  304. */
  305. public function dataProviderForOneWayAxiomsThreeSamples(): array
  306. {
  307. return [
  308. [
  309. [1, 2, 3],
  310. [3, 4, 5],
  311. [5, 6, 7],
  312. ],
  313. [
  314. [4, 5, 3, 6, 5],
  315. [5, 4, 3, 4, 4],
  316. [7, 6, 6, 5, 6],
  317. ],
  318. [
  319. [-4, 4, 5, 6, 7],
  320. [-5, 4, 6, 6, 7],
  321. [0, 1, 2, 3, 4],
  322. ],
  323. ];
  324. }
  325. /**
  326. * @test Axioms of one-way ANOVA results using five samples
  327. * @dataProvider dataProviderForOneWayAxiomsFiveSamples
  328. * @param array $sample1
  329. * @param array $sample2
  330. * @param array $sample3
  331. * @param array $sample4
  332. * @param array $sample5
  333. */
  334. public function testOneWayAxiomsFiveSamples(array $sample1, array $sample2, array $sample3, array $sample4, array $sample5)
  335. {
  336. // When
  337. $anova = ANOVA::oneWay($sample1, $sample2, $sample3, $sample4, $sample5);
  338. // Then SST = SSB + SSW
  339. $SST = $anova['ANOVA']['total']['SS'];
  340. $SSB = $anova['ANOVA']['treatment']['SS'];
  341. $SSW = $anova['ANOVA']['error']['SS'];
  342. $this->assertEqualsWithDelta($SST, $SSB + $SSW, 0.00001);
  343. // And dfT = dfB + dfW
  344. $dfT = $anova['ANOVA']['total']['df'];
  345. $dfB = $anova['ANOVA']['treatment']['df'];
  346. $dfW = $anova['ANOVA']['error']['df'];
  347. $this->assertEqualsWithDelta($dfT, $dfB + $dfW, 0.00001);
  348. }
  349. /**
  350. * @return array [sample1, sample2, sample3, sample4, sample5]
  351. */
  352. public function dataProviderForOneWayAxiomsFiveSamples(): array
  353. {
  354. return [
  355. [
  356. [1, 2, 3],
  357. [3, 4, 5],
  358. [5, 6, 7],
  359. [7, 8, 9],
  360. [9, 10, 11],
  361. ],
  362. [
  363. [4, 5, 3, 6, 5],
  364. [5, 4, 3, 4, 4],
  365. [7, 6, 6, 5, 6],
  366. [5, 6, 6, 5, 4],
  367. [8, 7, 7, 6, 7],
  368. ],
  369. [
  370. [-4, 4, 5, 6, 7],
  371. [-5, 4, 6, 6, 7],
  372. [0, 1, 2, 3, 4],
  373. [-2, -1, -1, 4, 5],
  374. [5, 5, 5, 5, 5],
  375. ],
  376. ];
  377. }
  378. /**
  379. * @test twoWay using two sample sets
  380. * @dataProvider dataProviderForTwoWayTwoAs
  381. * @param array $A₁
  382. * @param array $A₂
  383. * @param array $expected
  384. */
  385. public function testTwoWayTwoAs(array $A₁, array $A₂, array $expected)
  386. {
  387. // When
  388. $anova = ANOVA::twoWay($A₁, $A₂);
  389. // Then
  390. $this->assertEqualsWithDelta($expected, $anova['ANOVA'], 0.001);
  391. }
  392. /**
  393. * @return array [A₁, $A₂, expectedAnova]
  394. */
  395. public function dataProviderForTwoWayTwoAs(): array
  396. {
  397. return [
  398. [
  399. // Factor A₁
  400. [
  401. [4, 6, 8], // Factor B₁
  402. [6, 6, 9], // Factor B₂
  403. [8, 9, 13], // Factor B₃
  404. ],
  405. // Factor A₂
  406. [
  407. [4, 8, 9], // Factor B₁
  408. [7, 10, 13], // Factor B₂
  409. [12, 14, 16], // Factor B₃
  410. ],
  411. // ANOVA result
  412. [
  413. 'factorA' => [
  414. 'SS' => 32,
  415. 'df' => 1,
  416. 'MS' => 32,
  417. 'F' => 5.647059,
  418. 'P' => 0.03499435
  419. ],
  420. 'factorB' => [
  421. 'SS' => 93,
  422. 'df' => 2,
  423. 'MS' => 46.5,
  424. 'F' => 8.205882,
  425. 'P' => 0.005676730,
  426. ],
  427. 'interaction' => [
  428. 'SS' => 7,
  429. 'df' => 2,
  430. 'MS' => 3.5,
  431. 'F' => 0.617647,
  432. 'P' => 0.5555023,
  433. ],
  434. 'error' => [
  435. 'SS' => 68,
  436. 'df' => 12,
  437. 'MS' => 5.6667,
  438. ],
  439. 'total' => [
  440. 'SS' => 200,
  441. 'df' => 17,
  442. ],
  443. ],
  444. ],
  445. // Calculations: http://scistatcalc.blogspot.com/2013/11/two-factor-anova-test-calculator.html
  446. [
  447. // Factor A₁
  448. [
  449. [4.1, 3.1, 3.5], // Factor B₁
  450. [3.9, 2.8, 3.2], // Factor B₂
  451. [4.3, 3.3, 3.6], // Factor B₃
  452. ],
  453. // Factor A₂
  454. [
  455. [2.7, 1.9, 2.7], // Factor B₁
  456. [3.1, 2.2, 2.3], // Factor B₂
  457. [2.6, 2.3, 2.5], // Factor B₃
  458. ],
  459. // ANOVA result
  460. [
  461. 'factorA' => [
  462. 'SS' => 5.013889,
  463. 'df' => 1,
  464. 'MS' => 5.013889,
  465. 'F' => 23.022959,
  466. 'P' => 4.348485e-4
  467. ],
  468. 'factorB' => [
  469. 'SS' => 0.101111,
  470. 'df' => 2,
  471. 'MS' => 0.050556,
  472. 'F' => 0.232143,
  473. 'P' => 7.963117e-1,
  474. ],
  475. 'interaction' => [
  476. 'SS' => 0.201111,
  477. 'df' => 2,
  478. 'MS' => 0.100556,
  479. 'F' => 0.461735,
  480. 'P' => 6.409332e-1,
  481. ],
  482. 'error' => [
  483. 'SS' => 2.613333,
  484. 'df' => 12,
  485. 'MS' => 0.217778,
  486. ],
  487. 'total' => [
  488. 'SS' => 7.9294,
  489. 'df' => 17,
  490. ],
  491. ],
  492. ],
  493. ];
  494. }
  495. /**
  496. * @test twoWay using three sample sets
  497. * @dataProvider dataProviderForTwoWayThreeAs
  498. * @param array $A₁
  499. * @param array $A₂
  500. * @param array $A₃
  501. * @param array $expected
  502. */
  503. public function testTwoWayThreeAs(array $A₁, array $A₂, array $A₃, array $expected)
  504. {
  505. // When
  506. $anova = ANOVA::twoWay($A₁, $A₂, $A₃);
  507. // Then
  508. $this->assertEqualsWithDelta($expected, $anova['ANOVA'], 0.001);
  509. }
  510. /**
  511. * @return array [A₁, A₂, A₃, expectedAnova]
  512. */
  513. public function dataProviderForTwoWayThreeAs(): array
  514. {
  515. return [
  516. // Example data from: https://people.richland.edu/james/lecture/m170/ch13-2wy.html
  517. [
  518. // Factor A₁
  519. [
  520. [106, 110], // Factor B₁
  521. [95, 100], // Factor B₂
  522. [94, 107], // Factor B₃
  523. [103, 104], // Factor B₄
  524. [100, 102], // Factor B₅
  525. ],
  526. // Factor A₂
  527. [
  528. [110, 112], // Factor B₁
  529. [98, 99], // Factor B₂
  530. [100, 101], // Factor B₃
  531. [108, 112], // Factor B₄
  532. [105, 107], // Factor B₅
  533. ],
  534. // Factor A₃
  535. [
  536. [94, 97], // Factor B₁
  537. [86, 87], // Factor B₂
  538. [98, 99], // Factor B₃
  539. [99, 101], // Factor B₄
  540. [94, 98], // Factor B₅
  541. ],
  542. // ANOVA result
  543. [
  544. 'factorA' => [
  545. 'SS' => 512.8667,
  546. 'df' => 2,
  547. 'MS' => 256.4333,
  548. 'F' => 28.283,
  549. 'P' => 0.000008
  550. ],
  551. 'factorB' => [
  552. 'SS' => 449.4667,
  553. 'df' => 4,
  554. 'MS' => 112.3667,
  555. 'F' => 12.393,
  556. 'P' => 0.000119,
  557. ],
  558. 'interaction' => [
  559. 'SS' => 143.1333,
  560. 'df' => 8,
  561. 'MS' => 17.8917,
  562. 'F' => 1.973,
  563. 'P' => 0.122090,
  564. ],
  565. 'error' => [
  566. 'SS' => 136.0000,
  567. 'df' => 15,
  568. 'MS' => 9.0667,
  569. ],
  570. 'total' => [
  571. 'SS' => 1241.4667,
  572. 'df' => 29,
  573. ],
  574. ],
  575. ],
  576. // Example data from: https://people.richland.edu/james/ictcm/2004/twoway.html
  577. // Calculations: http://scistatcalc.blogspot.com/2013/11/two-factor-anova-test-calculator.html
  578. [
  579. // Factor A₁
  580. [
  581. [54, 49, 59, 39, 55], // Factor B₁
  582. [25, 29, 47, 26, 28], // Factor B₂
  583. ],
  584. // Factor A₂
  585. [
  586. [53, 72, 43, 56, 52], // Factor B₁
  587. [46, 51, 33, 47, 41], // Factor B₂
  588. ],
  589. // Factor A₃
  590. [
  591. [33, 30, 26, 25, 29], // Factor B₁
  592. [18, 21, 34, 40, 24], // Factor B₂
  593. ],
  594. // ANOVA result
  595. [
  596. 'factorA' => [
  597. 'SS' => 2328.2,
  598. 'df' => 2,
  599. 'MS' => 1164.10,
  600. 'F' => 17.580166,
  601. 'P' => 1.986862e-5
  602. ],
  603. 'factorB' => [
  604. 'SS' => 907.5,
  605. 'df' => 1,
  606. 'MS' => 907.50,
  607. 'F' => 13.705009,
  608. 'P' => 1.114639e-3,
  609. ],
  610. 'interaction' => [
  611. 'SS' => 452.6,
  612. 'df' => 2,
  613. 'MS' => 226.30,
  614. 'F' => 3.417569,
  615. 'P' => 4.942928e-2,
  616. ],
  617. 'error' => [
  618. 'SS' => 1589.2,
  619. 'df' => 24,
  620. 'MS' => 66.21666666666667,
  621. ],
  622. 'total' => [
  623. 'SS' => 5277.5,
  624. 'df' => 29,
  625. ],
  626. ],
  627. ],
  628. ];
  629. }
  630. /**
  631. * @test twoWay throws a BadDataException if there are fewer than two sample sets
  632. */
  633. public function testTwoWayExceptionLessThanTwoAs()
  634. {
  635. // Given
  636. $A₁ = [1, 2, 3];
  637. // Then
  638. $this->expectException(Exception\BadDataException::class);
  639. // When
  640. ANOVA::twoWay($A₁);
  641. }
  642. /**
  643. * @test twoWay throws a BadDataException if the sample sets have unequal factors
  644. */
  645. public function testTwoWAyExceptionDifferentNumbersOfFactorBs()
  646. {
  647. // Given
  648. $A₁ = [
  649. [106, 110], // Factor B₁
  650. [95, 100], // Factor B₂
  651. ];
  652. $A₂ = [
  653. [106, 110], // Factor B₁
  654. [95, 100], // Factor B₂
  655. [95, 100], // Factor B₃!
  656. ];
  657. // Then
  658. $this->expectException(Exception\BadDataException::class);
  659. // Then
  660. ANOVA::twoWay($A₁, $A₂);
  661. }
  662. /**
  663. * @test twoWay throws a BadDataException if the sample sets have factors with unequal elements
  664. */
  665. public function testTwoWAyExceptionDifferentNumbersOfFactorElements()
  666. {
  667. // Given
  668. $A₁ = [
  669. [106, 110], // Factor B₁
  670. [95, 100], // Factor B₂
  671. ];
  672. $A₂ = [
  673. [106, 110, 200], // Factor B₁ has 3 elements!
  674. [95, 100], // Factor B₂
  675. ];
  676. // Then
  677. $this->expectException(Exception\BadDataException::class);
  678. // When
  679. ANOVA::twoWay($A₁, $A₂);
  680. }
  681. }