LinearTest.php 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566
  1. <?php
  2. namespace MathPHP\Tests\Statistics\Regression;
  3. use MathPHP\Statistics\Regression\Linear;
  4. class LinearTest extends \PHPUnit\Framework\TestCase
  5. {
  6. /**
  7. * @test constructor
  8. */
  9. public function testConstructor()
  10. {
  11. // Given
  12. $points = [ [1,2], [2,3], [4,5], [5,7], [6,8] ];
  13. // When
  14. $regression = new Linear($points);
  15. // Then
  16. $this->assertInstanceOf(\MathPHP\Statistics\Regression\Regression::class, $regression);
  17. $this->assertInstanceOf(\MathPHP\Statistics\Regression\Linear::class, $regression);
  18. }
  19. /**
  20. * @test getPoints
  21. */
  22. public function testGetPoints()
  23. {
  24. // Given
  25. $points = [ [1,2], [2,3], [4,5], [5,7], [6,8] ];
  26. $regression = new Linear($points);
  27. // Then
  28. $this->assertEquals($points, $regression->getPoints());
  29. }
  30. /**
  31. * @test getXs
  32. */
  33. public function testGetXs()
  34. {
  35. // Given
  36. $points = [ [1,2], [2,3], [4,5], [5,7], [6,8] ];
  37. $regression = new Linear($points);
  38. // Then
  39. $this->assertEquals([1,2,4,5,6], $regression->getXs());
  40. }
  41. /**
  42. * @test getYs
  43. */
  44. public function testGetYs()
  45. {
  46. // Given
  47. $points = [ [1,2], [2,3], [4,5], [5,7], [6,8] ];
  48. $regression = new Linear($points);
  49. // Then
  50. $this->assertEquals([2,3,5,7,8], $regression->getYs());
  51. }
  52. /**
  53. * @test getEquation - Equation matches pattern y = mx + b
  54. * @dataProvider dataProviderForEquation
  55. * @param array $points
  56. */
  57. public function testGetEquation(array $points)
  58. {
  59. // Given
  60. $regression = new Linear($points);
  61. // Then
  62. $this->assertRegExp('/^y = -?\d+[.]\d+x [+] -?\d+[.]\d+$/', $regression->getEquation());
  63. }
  64. /**
  65. * @return array [points]
  66. */
  67. public function dataProviderForEquation(): array
  68. {
  69. return [
  70. [ [ [0,0], [1,1], [2,2], [3,3], [4,4] ] ],
  71. [ [ [1,2], [2,3], [4,5], [5,7], [6,8] ] ],
  72. [ [ [4,390], [9,580], [10,650], [14,730], [4,410], [7,530], [12,600], [22,790], [1,350], [3,400], [8,590], [11,640], [5,450], [6,520], [10,690], [11,690], [16,770], [13,700], [13,730], [10,640] ] ],
  73. ];
  74. }
  75. /**
  76. * @test getParameters
  77. * @dataProvider dataProviderForParameters
  78. * @param array $points
  79. * @param float $m
  80. * @param float $b
  81. */
  82. public function testGetParameters(array $points, float $m, float $b)
  83. {
  84. // Given
  85. $regression = new Linear($points);
  86. // When
  87. $parameters = $regression->getParameters();
  88. // Then
  89. $this->assertEqualsWithDelta($m, $parameters['m'], 0.0001);
  90. $this->assertEqualsWithDelta($b, $parameters['b'], 0.0001);
  91. }
  92. /**
  93. * @return array [points, m, b]
  94. */
  95. public function dataProviderForParameters(): array
  96. {
  97. return [
  98. [
  99. [ [1,2], [2,3], [4,5], [5,7], [6,8] ],
  100. 1.2209302325581, 0.60465116279069
  101. ],
  102. [
  103. [ [4,390], [9,580], [10,650], [14,730], [4,410], [7,530], [12,600], [22,790], [1,350], [3,400], [8,590], [11,640], [5,450], [6,520], [10,690], [11,690], [16,770], [13,700], [13,730], [10,640] ],
  104. 25.326467777896, 353.16487949889
  105. ],
  106. // Example data from http://reliawiki.org/index.php/Simple_Linear_Regression_Analysis
  107. [
  108. [ [50,122], [53,118], [54,128], [55,121], [56,125], [59,136], [62,144], [65,142], [67,149], [71,161], [72,167], [74,168], [75,162], [76,171], [79,175], [80,182], [82,180], [85,183], [87,188], [90,200], [93,194], [94,206], [95,207], [97,210], [100,219] ],
  109. 1.9952, 17.0016
  110. ],
  111. // Example data from http://faculty.cas.usf.edu/mbrannick/regression/regbas.html, http://www.alcula.com/calculators/statistics/linear-regression/
  112. [
  113. [ [61,105], [62,120], [63,120], [65,160], [65,120], [68,145], [69,175], [70,160], [72,185], [75,210] ],
  114. 6.968085106383, -316.86170212766
  115. ],
  116. [
  117. [ [6,562], [3,421], [6,581], [9,630], [3,412], [9,560], [6,434], [3,443], [9,590], [6,570], [3,346], [9,672] ],
  118. 34.583333333333, 310.91666666667
  119. ],
  120. [
  121. [ [95,85], [85,95], [80,70], [70,65], [60,70] ],
  122. 0.64383562, 26.780821917808
  123. ],
  124. [
  125. [ [1,1], [2,2], [3,1.3], [4,3.75], [5,2.25] ],
  126. 0.425, 0.785
  127. ],
  128. ];
  129. }
  130. /**
  131. * @test getSampleSize
  132. * @dataProvider dataProviderForSampleSize
  133. * @param array $points
  134. * @param int $n
  135. */
  136. public function testGetSampleSize(array $points, int $n)
  137. {
  138. // Given
  139. $regression = new Linear($points);
  140. // Then
  141. $this->assertEquals($n, $regression->getSampleSize());
  142. }
  143. /**
  144. * @return array [points, n]
  145. */
  146. public function dataProviderForSampleSize(): array
  147. {
  148. return [
  149. [
  150. [ [1,2], [2,3], [4,5], [5,7], [6,8] ], 5
  151. ],
  152. [
  153. [ [4,390], [9,580], [10,650], [14,730], [4,410], [7,530], [12,600], [22,790], [1,350], [3,400], [8,590], [11,640], [5,450], [6,520], [10,690], [11,690], [16,770], [13,700], [13,730], [10,640] ], 20
  154. ],
  155. ];
  156. }
  157. /**
  158. * @test evaluate
  159. * @dataProvider dataProviderForEvaluate
  160. * @param array $points
  161. * @param float $x
  162. * @param float $y
  163. */
  164. public function testEvaluate(array $points, float $x, float $y)
  165. {
  166. // Given
  167. $regression = new Linear($points);
  168. // Then
  169. $this->assertEqualsWithDelta($y, $regression->evaluate($x), 0.01);
  170. }
  171. /**
  172. * @return array [points, x, y]
  173. */
  174. public function dataProviderForEvaluate(): array
  175. {
  176. return [
  177. [
  178. [ [0,0], [1,1], [2,2], [3,3], [4,4] ], // y = x + 0
  179. 5, 5,
  180. ],
  181. [
  182. [ [0,0], [1,1], [2,2], [3,3], [4,4] ], // y = x + 0
  183. 18, 18,
  184. ],
  185. [
  186. [ [0,0], [1,2], [2,4], [3,6] ], // y = 2x + 0
  187. 4, 8,
  188. ],
  189. [
  190. [ [0,1], [1,3.5], [2,6] ], // y = 2.5x + 1
  191. 5, 13.5
  192. ],
  193. [
  194. [ [0,2], [1,1], [2,0], [3,-1] ], // y = -x + 2
  195. 4, -2
  196. ],
  197. // Example data from http://reliawiki.org/index.php/Simple_Linear_Regression_Analysis
  198. [
  199. [ [50,122], [53,118], [54,128], [55,121], [56,125], [59,136], [62,144], [65,142], [67,149], [71,161], [72,167], [74,168], [75,162], [76,171], [79,175], [80,182], [82,180], [85,183], [87,188], [90,200], [93,194], [94,206], [95,207], [97,210], [100,219] ],
  200. 93, 202.5552
  201. ],
  202. ];
  203. }
  204. /**
  205. * @test ci
  206. * @dataProvider dataProviderForCI
  207. * @param array $points
  208. * @param float $x
  209. * @param float $p
  210. * @param float $ci
  211. * @throws \Exception
  212. */
  213. public function testCI(array $points, float $x, float $p, float $ci)
  214. {
  215. // Given
  216. $regression = new Linear($points);
  217. // Then
  218. $this->assertEqualsWithDelta($ci, $regression->ci($x, $p), .0000001);
  219. }
  220. /**
  221. * @return array [points, x, p, ci]
  222. */
  223. public function dataProviderForCI(): array
  224. {
  225. return [
  226. [
  227. [ [1,2], [2,3], [4,5], [5,7], [6,8] ],
  228. 2, .05, 0.651543596,
  229. ],
  230. [
  231. [ [1,2], [2,3], [4,5], [5,7], [6,8] ],
  232. 3, .05, 0.518513005,
  233. ],
  234. [
  235. [ [1,2], [2,3], [4,5], [5,7], [6,8] ],
  236. 3, .1, 0.383431307,
  237. ],
  238. ];
  239. }
  240. /**
  241. * @test Github issue 429 - ci division by zero
  242. */
  243. public function testBugIssue429CI()
  244. {
  245. // Given
  246. $points = [[5,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,0],[8,1],[8,0],[8,1],[8,1],[8,1],[8,1],[8,0],[8,1],[8,0],[8,1],[8,0],[8,1],[8,0],[8,1],[8,0],[8,1],[8,1],[8,1],[8,0],[8,1],[8,0],[8,1],[8,1],[8,1],[8,0],[8,1],[8,0],[8,1],[8,0],[8,1],[8,0],[8,1],[8,0],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1],[8,1]];
  247. $x = 5.0;
  248. // And
  249. $regression = new Linear($points);
  250. // When
  251. $ci = $regression->ci($x, 0.05);
  252. // Then
  253. $this->assertEqualsWithDelta(0.39030395, $ci, 0.000001);
  254. }
  255. /**
  256. * @test pi
  257. * @dataProvider dataProviderForPI
  258. * @param array $points
  259. * @param float $x
  260. * @param float $p
  261. * @param float $q
  262. * @param float $pi
  263. * @throws \Exception
  264. */
  265. public function testPI(array $points, float $x, float $p, float $q, float $pi)
  266. {
  267. // Given
  268. $regression = new Linear($points);
  269. // Then
  270. $this->assertEqualsWithDelta($pi, $regression->pi($x, $p, $q), .0000001);
  271. }
  272. /**
  273. * @return array [points, x, p, q, pi]
  274. */
  275. public function dataProviderForPI(): array
  276. {
  277. return [
  278. [
  279. [ [1,2], [2,3], [4,5], [5,7], [6,8] ],
  280. 2, .05, 1, 1.281185007,
  281. ],
  282. [
  283. [ [1,2], [2,3], [4,5], [5,7], [6,8] ],
  284. 3, .05, 1, 1.218926455,
  285. ],
  286. [
  287. [ [1,2], [2,3], [4,5], [5,7], [6,8] ], // when q gets large, pi approaches ci.
  288. 3, .1, 10000000, 0.383431394
  289. ],
  290. ];
  291. }
  292. /**
  293. * @test fProbability
  294. * @dataProvider dataProviderForFProbability
  295. * @param array $points
  296. * @param float $probability
  297. */
  298. public function testFProbability(array $points, float $probability)
  299. {
  300. // Given
  301. $regression = new Linear($points);
  302. // When
  303. $Fprob = $regression->fProbability();
  304. // Then
  305. $this->assertEqualsWithDelta($probability, $Fprob, .0000001);
  306. }
  307. /**
  308. * @return array [points, probability]
  309. */
  310. public function dataProviderForFProbability(): array
  311. {
  312. return [
  313. [
  314. [ [1,2], [2,3], [4,5], [5,7], [6,8] ],
  315. .999304272,
  316. ],
  317. ];
  318. }
  319. /**
  320. * @test tProbability
  321. * @dataProvider dataProviderForTProbability
  322. * @param array $points
  323. * @param float $beta0
  324. * @param float $beta1
  325. */
  326. public function testTProbability(array $points, float $beta0, float $beta1)
  327. {
  328. // Given
  329. $regression = new Linear($points);
  330. // When
  331. $Tprob = $regression->tProbability();
  332. // Then
  333. $this->assertEqualsWithDelta($beta0, $Tprob['m'], .0000001);
  334. $this->assertEqualsWithDelta($beta1, $Tprob['b'], .0000001);
  335. }
  336. /**
  337. * @return array [points, beta0, beta1]
  338. */
  339. public function dataProviderForTProbability(): array
  340. {
  341. return [
  342. [
  343. [ [1,2], [2,3], [4,5], [5,7], [6,8] ],
  344. 0.999652136, 0.913994632,
  345. ],
  346. ];
  347. }
  348. /**
  349. * @test leverages
  350. * @dataProvider dataProviderForLeverages
  351. * @param array $points
  352. * @param array $leverages
  353. */
  354. public function testLeverages(array $points, array $leverages)
  355. {
  356. // Given
  357. $regression = new Linear($points);
  358. // When
  359. $test_leverages = $regression->leverages();
  360. // Then
  361. foreach ($leverages as $key => $value) {
  362. $this->assertEqualsWithDelta($value, $test_leverages[$key], .0000001);
  363. }
  364. }
  365. /**
  366. * @return array [points, leverages]
  367. */
  368. public function dataProviderForLeverages(): array
  369. {
  370. return [
  371. [
  372. [ [1,2], [2,3], [4,5], [5,7], [6,8] ],
  373. [0.593023255813953, 0.348837209302325, 0.209302325581395, 0.313953488372093, 0.534883720930232],
  374. ],
  375. ];
  376. }
  377. /**
  378. * @test degreesOfFreedom
  379. * @dataProvider dataProviderForDF
  380. * @param array $points
  381. * @param int $df
  382. */
  383. public function testDF(array $points, int $df)
  384. {
  385. // Given
  386. $regression = new Linear($points);
  387. // Then
  388. $this->assertEqualsWithDelta($df, $regression->degreesOfFreedom(), .0000001);
  389. }
  390. /**
  391. * @return array [points, df]
  392. */
  393. public function dataProviderForDF(): array
  394. {
  395. return [
  396. [
  397. [ [1,2], [2,3], [4,5], [5,7], [6,8] ],
  398. 3,
  399. ],
  400. ];
  401. }
  402. /**
  403. * @test getProjectionMatrix
  404. * @dataProvider dataProviderForGetProjection
  405. * @param array $points
  406. * @param array $P
  407. */
  408. public function testGetProjection(array $points, array $P)
  409. {
  410. // Given
  411. $regression = new Linear($points);
  412. // When
  413. $test_P = $regression->getProjectionMatrix();
  414. // Then
  415. foreach ($P as $row_num => $row) {
  416. foreach ($row as $column_num => $value) {
  417. $this->assertEqualsWithDelta($value, $test_P[$row_num][$column_num], .0000001);
  418. }
  419. }
  420. }
  421. /**
  422. * @return array [points, P]
  423. */
  424. public function dataProviderForGetProjection(): array
  425. {
  426. return [
  427. [
  428. [ [1,2], [2,3], [4,5], [5,7], [6,8] ],
  429. [ [0.593023255813953, 0.441860465116279, 0.13953488372093, -0.0116279069767443, -0.162790697674419],
  430. [0.441860465116279, 0.348837209302325, 0.162790697674418, 0.069767441860465, -0.0232558139534887],
  431. [0.13953488372093, 0.162790697674418, 0.209302325581395, 0.232558139534884, 0.255813953488372],
  432. [-0.0116279069767442, 0.069767441860465, 0.232558139534884, 0.313953488372093, 0.395348837209302],
  433. [-0.162790697674419, -0.0232558139534885, 0.255813953488372, 0.395348837209302, 0.534883720930232] ],
  434. ],
  435. ];
  436. }
  437. /**
  438. * @test mean squares
  439. * @dataProvider dataProviderForMeanSquares
  440. * @param array $points
  441. * @param array $sums
  442. */
  443. public function testMeanSquares(array $points, array $sums)
  444. {
  445. // Given
  446. $regression = new Linear($points);
  447. // Then
  448. $this->assertEqualsWithDelta($sums['mse'], $regression->meanSquareResidual(), .0000001);
  449. $this->assertEqualsWithDelta($sums['msr'], $regression->meanSquareRegression(), .0000001);
  450. $this->assertEqualsWithDelta($sums['mst'], $regression->meanSquareTotal(), .0000001);
  451. $this->assertEqualsWithDelta($sums['sd'], $regression->errorSd(), .0000001);
  452. }
  453. /**
  454. * @return array [points, sums]
  455. */
  456. public function dataProviderForMeanSquares(): array
  457. {
  458. return [
  459. [
  460. [ [1,2], [2,3], [4,5], [5,7], [6,8] ],
  461. [
  462. 'mse' => 0.1201550388,
  463. 'msr' => 25.6395348837,
  464. 'mst' => 6.5,
  465. 'sd' => 0.3466338685,
  466. ],
  467. ],
  468. ];
  469. }
  470. /**
  471. * @test outliers
  472. * @dataProvider dataProviderForOutliers
  473. * @param array $points
  474. * @param array $cook
  475. * @param array $DFFITS
  476. */
  477. public function testOutliers(array $points, array $cook, array $DFFITS)
  478. {
  479. // Given
  480. $regression = new Linear($points);
  481. // When
  482. $test_cook = $regression->cooksD();
  483. $test_dffits = $regression->dffits();
  484. // Then
  485. foreach ($test_cook as $key => $value) {
  486. $this->assertEqualsWithDelta($value, $cook[$key], .0000001);
  487. }
  488. foreach ($test_dffits as $key => $value) {
  489. $this->assertEqualsWithDelta($value, $DFFITS[$key], .0000001);
  490. }
  491. }
  492. /**
  493. * @return array [points, cook, DFFITS]
  494. */
  495. public function dataProviderForOutliers(): array
  496. {
  497. return [
  498. // Example data from http://www.real-statistics.com/multiple-regression/outliers-and-influencers/
  499. [
  500. [ [5, 80], [23, 78], [25, 60], [48, 53], [17, 85], [8, 84], [4, 73], [26, 79], [11, 81], [19, 75], [14, 68], [35, 72], [29, 58], [4, 92], [23, 65] ],
  501. [0.012083306344603, 0.0300594698005975, 0.0757553251307135, 0.0741065959898502, 0.0624057528075083, 0.0142413619931789, 0.212136415565691, 0.0755417128075708, 0.00460659919090967, 0.00088992920763197, 0.0592838137660013, 0.142372813997539, 0.0975938916424623, 0.157390753959856, 0.0261198759356697],
  502. [-0.150079950062248, 0.24285101704604, -0.401412101080541, -0.372557646651725, 0.363674389274495, 0.163387818699222, -0.679956836684882, 0.398634868702933, 0.0925181155407344, 0.0405721294627194, -0.349647454278992, 0.540607683240147, -0.45315456934644, 0.572499188557405, -0.225453165214519],
  503. ],
  504. ];
  505. }
  506. }