DistributionTest.php 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637
  1. <?php
  2. namespace MathPHP\Tests\Statistics;
  3. use MathPHP\Statistics\Distribution;
  4. class DistributionTest extends \PHPUnit\Framework\TestCase
  5. {
  6. /**
  7. * @test frequency
  8. * @dataProvider dataProviderForFrequency
  9. * @param array $values
  10. * @param array $expected
  11. */
  12. public function testFrequency(array $values, array $expected)
  13. {
  14. // When
  15. $frequencies = Distribution::frequency($values);
  16. // Then
  17. $this->assertEquals($expected, $frequencies);
  18. }
  19. /**
  20. * @return array [values, frequencies]
  21. */
  22. public function dataProviderForFrequency(): array
  23. {
  24. return [
  25. [
  26. [ 'A', 'A', 'B', 'B', 'B', 'B', 'C', 'C', 'D', 'F' ],
  27. [ 'A' => 2, 'B' => 4, 'C' => 2, 'D' => 1, 'F' => 1 ],
  28. ],
  29. [
  30. [ 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
  31. [ 1 => 5, 2 => 3, 3 => 9, 4 => 14 ],
  32. ],
  33. [
  34. [ 'yes', 'yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'yes', 'no' ],
  35. [ 'yes' => 6, 'no' => 4 ],
  36. ],
  37. [
  38. [ 'agree', 'disagree', 'agree', 'agree', 'no opinion', 'agree', 'disagree' ],
  39. [ 'agree' => 4, 'disagree' => 2, 'no opinion' => 1 ],
  40. ],
  41. ];
  42. }
  43. /**
  44. * @test relativeFrequency
  45. * @dataProvider dataProviderForRelativeFrequency
  46. * @param array $values
  47. * @param array $expected
  48. */
  49. public function testRelativeFrequency(array $values, array $expected)
  50. {
  51. // When
  52. $frequencies = Distribution::relativeFrequency($values);
  53. // Then
  54. $this->assertEqualsWithDelta($expected, $frequencies, 0.0001);
  55. }
  56. /**
  57. * @return array [values, frequencies]
  58. */
  59. public function dataProviderForRelativeFrequency(): array
  60. {
  61. return [
  62. [
  63. [ 'A', 'A', 'B', 'B', 'B', 'B', 'C', 'C', 'D', 'F' ],
  64. [ 'A' => 0.2, 'B' => 0.4, 'C' => 0.2, 'D' => 0.1, 'F' => 0.1 ],
  65. ],
  66. [
  67. [ 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
  68. [ 1 => 0.16129032, 2 => 0.09677419, 3 => 0.29032258, 4 => 0.45161290 ],
  69. ],
  70. [
  71. [ 'yes', 'yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'yes', 'no' ],
  72. [ 'yes' => 0.6, 'no' => 0.4 ],
  73. ],
  74. [
  75. [ 'agree', 'disagree', 'agree', 'agree', 'no opinion', 'agree', 'disagree' ],
  76. [ 'agree' => 0.57142857, 'disagree' => 0.28571429, 'no opinion' => 0.14285714 ],
  77. ],
  78. ];
  79. }
  80. /**
  81. * @test cumulativeFrequency
  82. * @dataProvider dataProviderForCumulativeFrequency
  83. * @param array $values
  84. * @param array $expected
  85. */
  86. public function testCumulativeFrequency(array $values, array $expected)
  87. {
  88. // When
  89. $frequencies = Distribution::cumulativeFrequency($values);
  90. // Then
  91. $this->assertEqualsWithDelta($expected, $frequencies, 0.0001);
  92. }
  93. /**
  94. * @return array [values, frequencies]
  95. */
  96. public function dataProviderForCumulativeFrequency(): array
  97. {
  98. return [
  99. [
  100. [ 'A', 'A', 'B', 'B', 'B', 'B', 'C', 'C', 'D', 'F' ],
  101. [ 'A' => 2, 'B' => 6, 'C' => 8, 'D' => 9, 'F' => 10 ],
  102. ],
  103. [
  104. [ 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
  105. [ 1 => 5, 2 => 8, 3 => 17, 4 => 31 ],
  106. ],
  107. [
  108. [ 'yes', 'yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'yes', 'no' ],
  109. [ 'yes' => 6, 'no' => 10 ],
  110. ],
  111. [
  112. [ 'agree', 'disagree', 'agree', 'agree', 'no opinion', 'agree', 'disagree' ],
  113. [ 'agree' => 4, 'disagree' => 6, 'no opinion' => 7 ],
  114. ],
  115. ];
  116. }
  117. /**
  118. * @test cumulativeRelativeFrequency
  119. * @dataProvider dataProviderForCumulativeRelativeFrequency
  120. * @param array $values
  121. * @param array $expected
  122. */
  123. public function testCumulativeRelativeFrequency(array $values, array $expected)
  124. {
  125. // When
  126. $frequencies = Distribution::cumulativeRelativeFrequency($values);
  127. // Then
  128. $this->assertEqualsWithDelta($expected, $frequencies, 0.0001);
  129. }
  130. /**
  131. * @return array [values, frequencies]
  132. */
  133. public function dataProviderForCumulativeRelativeFrequency(): array
  134. {
  135. return [
  136. [
  137. [ 'A', 'A', 'B', 'B', 'B', 'B', 'C', 'C', 'D', 'F' ],
  138. [ 'A' => 0.2, 'B' => 0.6, 'C' => 0.8, 'D' => 0.9, 'F' => 1 ],
  139. ],
  140. [
  141. [ 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
  142. [ 1 => 0.16129032, 2 => 0.25806452, 3 => 0.5483871, 4 => 1 ],
  143. ],
  144. [
  145. [ 'yes', 'yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'yes', 'no' ],
  146. [ 'yes' => 0.6, 'no' => 1 ],
  147. ],
  148. [
  149. [ 'agree', 'disagree', 'agree', 'agree', 'no opinion', 'agree', 'disagree' ],
  150. [ 'agree' => 0.57142857, 'disagree' => 0.85714286, 'no opinion' => 1 ],
  151. ],
  152. ];
  153. }
  154. /**
  155. * @test fractionalRanking
  156. * @dataProvider dataProviderForRankingWithoutTies
  157. * @dataProvider dataProviderForFractionalRank
  158. * @param array $values
  159. * @param array $expected
  160. */
  161. public function testFractionalRanking(array $values, array $expected)
  162. {
  163. // When
  164. $sampleRank = Distribution::fractionalRanking($values);
  165. // Then
  166. $this->assertEquals($expected, $sampleRank);
  167. }
  168. /**
  169. * @test fractionalRanking: Sum of all assigned ranks is ½n(n + 1)
  170. * @dataProvider dataProviderForRankingWithoutTies
  171. * @dataProvider dataProviderForFractionalRank
  172. * @param array $values
  173. */
  174. public function testFractionalRankingDistributionSumOfAllRanks(array $values)
  175. {
  176. // Given
  177. $n = count($values);
  178. $expectedSumOfAssignedRanks = ($n * ($n + 1)) / 2;
  179. // When
  180. $sampleRank = Distribution::fractionalRanking($values);
  181. // Then
  182. $sumOfAssignedRanks = \array_sum($sampleRank);
  183. $this->assertEquals($expectedSumOfAssignedRanks, $sumOfAssignedRanks);
  184. }
  185. /**
  186. * Data generated with R: rank(c(1, 2, 3, 4, 5), ties.method='average')
  187. * @return array
  188. */
  189. public function dataProviderForRankingWithoutTies(): array
  190. {
  191. return [
  192. [
  193. [0],
  194. [1],
  195. ],
  196. [
  197. [1],
  198. [1],
  199. ],
  200. [
  201. [-1],
  202. [1],
  203. ],
  204. [
  205. [5],
  206. [1],
  207. ],
  208. [
  209. [1, 5],
  210. [1, 2],
  211. ],
  212. [
  213. [2, 5],
  214. [1, 2],
  215. ],
  216. [
  217. [1, 2, 3, 4, 5],
  218. [1, 2, 3, 4, 5],
  219. ],
  220. [
  221. [5, 2],
  222. [2, 1],
  223. ],
  224. [
  225. [5, 4, 3, 2, 1],
  226. [5, 4, 3, 2, 1],
  227. ],
  228. [
  229. [5, 3, 1, 2, 4],
  230. [5, 3, 1, 2, 4],
  231. ],
  232. [
  233. [1, 3, 5, 7, 9],
  234. [1, 2, 3, 4, 5],
  235. ],
  236. [
  237. [9, 7, 5, 3, 1],
  238. [5, 4, 3, 2, 1],
  239. ],
  240. [
  241. [3, 1, 4, 15, 92],
  242. [2, 1, 3, 4, 5],
  243. ],
  244. [
  245. [8, 4, 10, 3, 5, 32, 1, 98, 43],
  246. [5, 3, 6, 2, 4, 7, 1, 9, 8],
  247. ],
  248. [
  249. [1, 2, 4, 5],
  250. [1, 2, 3, 4],
  251. ],
  252. [
  253. [-3, -2, -1, 0, 1, 2, 3],
  254. [1, 2, 3, 4, 5, 6, 7],
  255. ],
  256. ];
  257. }
  258. /**
  259. * Data generated with R: rank(c(1, 2, 3, 4, 5), ties.method='average')
  260. * @return array
  261. */
  262. public function dataProviderForFractionalRank(): array
  263. {
  264. return [
  265. [
  266. [1, 2, 2, 3],
  267. [1, 2.5, 2.5, 4],
  268. ],
  269. [
  270. [3, 2, 2, 1],
  271. [4, 2.5, 2.5, 1],
  272. ],
  273. [
  274. [1, 2, 3, 3, 4, 5],
  275. [1, 2, 3.5, 3.5, 5, 6],
  276. ],
  277. [
  278. [1, 2, 3, 3, 3, 4, 5],
  279. [1, 2, 4, 4, 4, 6, 7],
  280. ],
  281. [
  282. [1, 1],
  283. [1.5, 1.5],
  284. ],
  285. [
  286. [0, 0],
  287. [1.5, 1.5],
  288. ],
  289. [
  290. [-1, -1],
  291. [1.5, 1.5],
  292. ],
  293. [
  294. [3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5],
  295. [4.5, 1.5, 6.0, 1.5, 8.0, 11.0, 3.0, 10.0, 8.0, 4.5, 8.0],
  296. ],
  297. [
  298. [1.0, 1.0, 2.0, 3.0, 3.0, 4.0, 5.0, 5.0, 5.0],
  299. [1.5, 1.5, 3, 4.5, 4.5, 6, 8, 8, 8],
  300. ],
  301. [
  302. [-3, -2, -2, -1, -1, 0, 1, 2, 3],
  303. [1, 2.5, 2.5, 4.5, 4.5, 6, 7, 8, 9],
  304. ],
  305. [
  306. [-1, 5, 7, -1],
  307. [1.5, 3, 4, 1.5],
  308. ],
  309. [
  310. [2.5, 2.5, 2.5, 3, 3, 2.5, 2.25, 2.75, 2, 2.75],
  311. [4.5, 4.5, 4.5, 9.5, 9.5, 4.5, 2.0, 7.5, 1.0, 7.5],
  312. ],
  313. [
  314. [2.25, 2.75, 2.75, 2.25, 2.25, 3.25, 2, 2, 2.75, 1.25],
  315. [5.0, 8.0, 8.0, 5.0, 5.0, 10.0, 2.5, 2.5, 8.0, 1.0],
  316. ],
  317. [
  318. [2.534, 2.512, 2.4634, 2.512, 2.543, 2.5, 2.51, 2.49, 2.49, 2.53, 2.5],
  319. [10.0, 7.5, 1.0, 7.5, 11.0, 4.5, 6.0, 2.5, 2.5, 9.0, 4.5],
  320. ],
  321. ];
  322. }
  323. /**
  324. * @test standardCompetitionRanking
  325. * @dataProvider dataProviderForRankingWithoutTies
  326. * @dataProvider dataProviderForStandardCompetitionRanking
  327. * @param array $values
  328. * @param array $expected
  329. */
  330. public function testStandardCompetitionRanking(array $values, array $expected)
  331. {
  332. // When
  333. $ranking = Distribution::standardCompetitionRanking($values);
  334. // Then
  335. $this->assertEquals($expected, $ranking);
  336. }
  337. /**
  338. * Data generated with R: rank(c(1, 2, 3, 4, 5), ties.method='min')
  339. * @return array
  340. */
  341. public function dataProviderForStandardCompetitionRanking(): array
  342. {
  343. return [
  344. [
  345. [1, 2, 2, 3],
  346. [1, 2, 2, 4],
  347. ],
  348. [
  349. [3, 2, 2, 1],
  350. [4, 2, 2, 1],
  351. ],
  352. [
  353. [1, 2, 3, 3, 4, 5],
  354. [1, 2, 3, 3, 5, 6],
  355. ],
  356. [
  357. [1, 2, 3, 3, 3, 4, 5],
  358. [1, 2, 3, 3, 3, 6, 7],
  359. ],
  360. [
  361. [1, 1],
  362. [1, 1],
  363. ],
  364. [
  365. [0, 0],
  366. [1, 1],
  367. ],
  368. [
  369. [-1, -1],
  370. [1, 1],
  371. ],
  372. [
  373. [3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5],
  374. [4, 1, 6, 1, 7, 11, 3, 10, 7, 4, 7],
  375. ],
  376. [
  377. [1.0, 1.0, 2.0, 3.0, 3.0, 4.0, 5.0, 5.0, 5.0],
  378. [1, 1, 3, 4, 4, 6, 7, 7, 7],
  379. ],
  380. [
  381. [-3, -2, -2, -1, -1, 0, 1, 2, 3],
  382. [1, 2, 2, 4, 4, 6, 7, 8, 9],
  383. ],
  384. [
  385. [-1, 5, 7, -1],
  386. [1, 3, 4, 1],
  387. ],
  388. [
  389. [2.5, 2.5, 2.5, 3, 3, 2.5, 2.25, 2.75, 2, 2.75],
  390. [3, 3, 3, 9, 9, 3, 2, 7, 1, 7],
  391. ],
  392. [
  393. [2.25, 2.75, 2.75, 2.25, 2.25, 3.25, 2, 2, 2.75, 1.25],
  394. [4, 7, 7, 4, 4, 10, 2, 2, 7, 1],
  395. ],
  396. [
  397. [2.534, 2.512, 2.4634, 2.512, 2.543, 2.5, 2.51, 2.49, 2.49, 2.53, 2.5],
  398. [10, 7, 1, 7, 11, 4, 6, 2, 2, 9, 4],
  399. ],
  400. ];
  401. }
  402. /**
  403. * @test modifiedCompetitionRanking
  404. * @dataProvider dataProviderForRankingWithoutTies
  405. * @dataProvider dataProviderForModifiedCompetitionRanking
  406. * @param array $values
  407. * @param array $expected
  408. */
  409. public function testModifiedCompetitionRanking(array $values, array $expected)
  410. {
  411. // When
  412. $ranking = Distribution::modifiedCompetitionRanking($values);
  413. // Then
  414. $this->assertEquals($expected, $ranking);
  415. }
  416. /**
  417. * Data generated with R: rank(c(1, 2, 3, 4, 5), ties.method='max')
  418. * @return array
  419. */
  420. public function dataProviderForModifiedCompetitionRanking(): array
  421. {
  422. return [
  423. [
  424. [1, 2, 2, 3],
  425. [1, 3, 3, 4],
  426. ],
  427. [
  428. [3, 2, 2, 1],
  429. [4, 3, 3, 1],
  430. ],
  431. [
  432. [1, 2, 3, 3, 4, 5],
  433. [1, 2, 4, 4, 5, 6],
  434. ],
  435. [
  436. [1, 2, 3, 3, 3, 4, 5],
  437. [1, 2, 5, 5, 5, 6, 7],
  438. ],
  439. [
  440. [1, 1],
  441. [2, 2],
  442. ],
  443. [
  444. [0, 0],
  445. [2, 2],
  446. ],
  447. [
  448. [-1, -1],
  449. [2, 2],
  450. ],
  451. [
  452. [3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5],
  453. [5, 2, 6, 2, 9, 11, 3, 10, 9, 5, 9],
  454. ],
  455. [
  456. [1.0, 1.0, 2.0, 3.0, 3.0, 4.0, 5.0, 5.0, 5.0],
  457. [2, 2, 3, 5, 5, 6, 9, 9, 9],
  458. ],
  459. [
  460. [-3, -2, -2, -1, -1, 0, 1, 2, 3],
  461. [1, 3, 3, 5, 5, 6, 7, 8, 9],
  462. ],
  463. [
  464. [-1, 5, 7, -1],
  465. [2, 3, 4, 2],
  466. ],
  467. [
  468. [2.5, 2.5, 2.5, 3, 3, 2.5, 2.25, 2.75, 2, 2.75],
  469. [6, 6, 6, 10, 10, 6, 2, 8, 1, 8],
  470. ],
  471. [
  472. [2.25, 2.75, 2.75, 2.25, 2.25, 3.25, 2, 2, 2.75, 1.25],
  473. [6, 9, 9, 6, 6, 10, 3, 3, 9, 1],
  474. ],
  475. [
  476. [2.534, 2.512, 2.4634, 2.512, 2.543, 2.5, 2.51, 2.49, 2.49, 2.53, 2.5],
  477. [10, 8, 1, 8, 11, 5, 6, 3, 3, 9, 5],
  478. ],
  479. ];
  480. }
  481. /**
  482. * @test ordinalRanking
  483. * @dataProvider dataProviderForRankingWithoutTies
  484. * @dataProvider dataProviderForOrdinalRanking
  485. * @param array $values
  486. * @param array $expected
  487. */
  488. public function testOrdinalRanking(array $values, array $expected)
  489. {
  490. // When
  491. $ranking = Distribution::ordinalRanking($values);
  492. // Then
  493. $this->assertEquals($expected, $ranking);
  494. }
  495. /**
  496. * Data generated with R: rank(c(1, 2, 3, 4, 5), ties.method='first')
  497. * @return array
  498. */
  499. public function dataProviderForOrdinalRanking(): array
  500. {
  501. return [
  502. [
  503. [1, 2, 2, 3],
  504. [1, 2, 3, 4],
  505. ],
  506. [
  507. [3, 2, 2, 1],
  508. [4, 2, 3, 1],
  509. ],
  510. [
  511. [1, 2, 3, 3, 4, 5],
  512. [1, 2, 3, 4, 5, 6],
  513. ],
  514. [
  515. [1, 2, 3, 3, 3, 4, 5],
  516. [1, 2, 3, 4, 5, 6, 7],
  517. ],
  518. [
  519. [1, 1],
  520. [1, 2],
  521. ],
  522. [
  523. [0, 0],
  524. [1, 2],
  525. ],
  526. [
  527. [-1, -1],
  528. [1, 2],
  529. ],
  530. [
  531. [3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5],
  532. [4, 1, 6, 2, 7, 11, 3, 10, 8, 5, 9],
  533. ],
  534. [
  535. [1.0, 1.0, 2.0, 3.0, 3.0, 4.0, 5.0, 5.0, 5.0],
  536. [1, 2, 3, 4, 5, 6, 7, 8, 9],
  537. ],
  538. [
  539. [-3, -2, -2, -1, -1, 0, 1, 2, 3],
  540. [1, 2, 3, 4, 5, 6, 7, 8, 9],
  541. ],
  542. [
  543. [-1, 5, 7, -1],
  544. [1, 3, 4, 2],
  545. ],
  546. [
  547. [2.5, 2.5, 2.5, 3, 3, 2.5, 2.25, 2.75, 2, 2.75],
  548. [3, 4, 5, 9, 10, 6, 2, 7, 1, 8],
  549. ],
  550. [
  551. [2.25, 2.75, 2.75, 2.25, 2.25, 3.25, 2, 2, 2.75, 1.25],
  552. [4, 7, 8, 5, 6, 10, 2, 3, 9, 1],
  553. ],
  554. [
  555. [2.534, 2.512, 2.4634, 2.512, 2.543, 2.5, 2.51, 2.49, 2.49, 2.53, 2.5],
  556. [10, 7, 1, 8, 11, 4, 6, 2, 3, 9, 5],
  557. ],
  558. ];
  559. }
  560. /**
  561. * @test stemAndLeafPlot
  562. * @dataProvider dataProviderForStemAndLeafPlot
  563. * @param array $values
  564. * @param array $expected
  565. */
  566. public function testStemAndLeafPlot(array $values, array $expected)
  567. {
  568. // When
  569. $plot = Distribution::stemAndLeafPlot($values);
  570. // Then
  571. $this->assertEquals($expected, $plot);
  572. }
  573. /**
  574. * @return array [values, plot]
  575. */
  576. public function dataProviderForStemAndLeafPlot(): array
  577. {
  578. return [
  579. [
  580. [44, 46, 47, 49, 63, 64, 66, 68, 68, 72, 72, 75, 76, 81, 84, 88, 106, ],
  581. [ 4 => [4, 6, 7, 9], 5 => [], 6 => [3, 4, 6, 8, 8], 7 => [2, 2, 5, 6], 8 => [1, 4, 8], 9 => [], 10 => [6] ],
  582. ],
  583. ];
  584. }
  585. /**
  586. * @test stemAndLeafPlot printed to standard output
  587. */
  588. public function testStemAndLeafPlotPrint()
  589. {
  590. // Given
  591. $print = true;
  592. // Then
  593. $this->expectOutputString('0 | 1 2 3' . \PHP_EOL);
  594. // When
  595. Distribution::stemAndLeafPlot([1, 2, 3], $print);
  596. }
  597. }