DistanceTest.php 31 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349
  1. <?php
  2. namespace MathPHP\Tests\Statistics;
  3. use MathPHP\LinearAlgebra\NumericMatrix;
  4. use MathPHP\Statistics\Distance;
  5. use MathPHP\Exception;
  6. class DistanceTest extends \PHPUnit\Framework\TestCase
  7. {
  8. /**
  9. * @test bhattacharyya
  10. * @dataProvider dataProviderForBhattacharyyaDistance
  11. * @param array $p
  12. * @param array $q
  13. * @param float $expected
  14. */
  15. public function testBhattacharyyaDistance(array $p, array $q, float $expected)
  16. {
  17. // When
  18. $BD = Distance::bhattacharyya($p, $q);
  19. // Then
  20. $this->assertEqualsWithDelta($expected, $BD, 0.0001);
  21. }
  22. /**
  23. * @return array [p, q, distance]
  24. */
  25. public function dataProviderForBhattacharyyaDistance(): array
  26. {
  27. return [
  28. [
  29. [0.2, 0.5, 0.3],
  30. [0.1, 0.4, 0.5],
  31. 0.024361049046679,
  32. ],
  33. [
  34. [0.4, 0.6],
  35. [0.3, 0.7],
  36. 0.005531036666445
  37. ],
  38. [
  39. [0.9, 0.1],
  40. [0.1, 0.9],
  41. 0.510825623765991
  42. ],
  43. ];
  44. }
  45. /**
  46. * @test bhattacharyya when arrays are different lengths
  47. */
  48. public function testBhattacharyyaDistanceExceptionArraysDifferentLength()
  49. {
  50. // Given
  51. $p = [0.4, 0.5, 0.1];
  52. $q = [0.2, 0.8];
  53. // Then
  54. $this->expectException(Exception\BadDataException::class);
  55. // When
  56. Distance::bhattacharyya($p, $q);
  57. }
  58. /**
  59. * @test bhattacharyya when probabilities do not add up to one
  60. */
  61. public function testBhattacharyyaDistanceExceptionNotProbabilityDistributionThatAddsUpToOne()
  62. {
  63. // Given
  64. $p = [0.2, 0.2, 0.1];
  65. $q = [0.2, 0.4, 0.6];
  66. // Then
  67. $this->expectException(Exception\BadDataException::class);
  68. // When
  69. Distance::bhattacharyya($p, $q);
  70. }
  71. /**
  72. * @test hellinger
  73. * @dataProvider dataProviderForHellingerDistance
  74. * @param array $p
  75. * @param array $q
  76. * @param float $expected
  77. */
  78. public function testHellingerDistance(array $p, array $q, float $expected)
  79. {
  80. // When
  81. $BD = Distance::hellinger($p, $q);
  82. // Then
  83. $this->assertEqualsWithDelta($expected, $BD, 0.0001);
  84. }
  85. /**
  86. * Test data created with Python's numpy/scipy: norm(np.sqrt(p) - np.sqrt(q)) / np.sqrt(2)
  87. * @return array [p, q, distance]
  88. */
  89. public function dataProviderForHellingerDistance(): array
  90. {
  91. return [
  92. [
  93. [0.2905, 0.4861, 0.2234],
  94. [0.2704, 0.5259, 0.2137],
  95. 0.025008343695279284,
  96. ],
  97. [
  98. [0.5, 0.5],
  99. [0.75, 0.25],
  100. 0.18459191128251448,
  101. ],
  102. [
  103. [0.2, 0.5, 0.3],
  104. [0.1, 0.4, 0.5],
  105. 0.15513450177826621,
  106. ],
  107. [
  108. [0.4, 0.6],
  109. [0.3, 0.7],
  110. 0.074268220965891737
  111. ],
  112. [
  113. [0.9, 0.1],
  114. [0.1, 0.9],
  115. 0.63245553203367577
  116. ],
  117. ];
  118. }
  119. /**
  120. * @test hellinger when the arrays are different lengths
  121. */
  122. public function testHellingerDistanceExceptionArraysDifferentLength()
  123. {
  124. // Given
  125. $p = [0.4, 0.5, 0.1];
  126. $q = [0.2, 0.8];
  127. // Then
  128. $this->expectException(Exception\BadDataException::class);
  129. // When
  130. Distance::hellinger($p, $q);
  131. }
  132. /**
  133. * @test hellinger when the probabilities do not add up to one
  134. */
  135. public function testHellingerDistanceExceptionNotProbabilityDistributionThatAddsUpToOne()
  136. {
  137. // Given
  138. $p = [0.2, 0.2, 0.1];
  139. $q = [0.2, 0.4, 0.6];
  140. // Then
  141. $this->expectException(Exception\BadDataException::class);
  142. // When
  143. Distance::hellinger($p, $q);
  144. }
  145. /**
  146. * @test jensenShannon
  147. * @dataProvider dataProviderForJensenShannon
  148. * @param array $p
  149. * @param array $q
  150. * @param float $expected
  151. */
  152. public function testJensenShannon(array $p, array $q, float $expected)
  153. {
  154. // When
  155. $BD = Distance::jensenShannon($p, $q);
  156. // Then
  157. $this->assertEqualsWithDelta($expected, $BD, 0.0001);
  158. }
  159. /**
  160. * Test data created with Python scipy.spatial.distance.jensenshannon
  161. * distance.jensenshannon(p, q)
  162. * @return array [p, q, distance]
  163. */
  164. public function dataProviderForJensenShannon(): array
  165. {
  166. return [
  167. [
  168. [0.4, 0.6],
  169. [0.5, 0.5],
  170. 0.07112938864483229,
  171. ],
  172. [
  173. [0.1, 0.2, 0.2, 0.2, 0.2, 0.1],
  174. [0.0, 0.1, 0.4, 0.4, 0.1, 0.0],
  175. 0.346820456568833
  176. ],
  177. [
  178. [0.25, 0.5, 0.25],
  179. [0.5, 0.3, 0.2],
  180. 0.18778369857844396,
  181. ],
  182. [
  183. [0.5, 0.3, 0.2],
  184. [0.25, 0.5, 0.25],
  185. 0.18778369857844396,
  186. ],
  187. ];
  188. }
  189. /**
  190. * @test jensenShannon when the arrays are different lengths
  191. */
  192. public function testJensenShannonExceptionArraysDifferentLength()
  193. {
  194. // Given
  195. $p = [0.4, 0.5, 0.1];
  196. $q = [0.2, 0.8];
  197. // Then
  198. $this->expectException(Exception\BadDataException::class);
  199. // When
  200. Distance::jensenShannon($p, $q);
  201. }
  202. /**
  203. * @test jensenShannon when the probabilities do not add up to one
  204. */
  205. public function testJensenShannonExceptionNotProbabilityDistributionThatAddsUpToOne()
  206. {
  207. // Given
  208. $p = [0.2, 0.2, 0.1];
  209. $q = [0.2, 0.4, 0.6];
  210. // Then
  211. $this->expectException(Exception\BadDataException::class);
  212. // When
  213. Distance::jensenShannon($p, $q);
  214. }
  215. /**
  216. * @test Mahalanobis from a point to the center of the data
  217. * @dataProvider dataProviderForMahalanobisCenter
  218. * @param array $x
  219. * @param NumericMatrix $data
  220. * @param float $expectedDistance
  221. * @throws \Exception
  222. */
  223. public function testMahalanobisCenter(array $x, NumericMatrix $data, float $expectedDistance)
  224. {
  225. // Given
  226. $x_m = new NumericMatrix($x);
  227. // When
  228. $distance = Distance::mahalanobis($x_m, $data);
  229. // Then
  230. $this->assertEqualsWithDelta($expectedDistance, $distance, 0.0001);
  231. }
  232. /**
  233. * @return array [x, data, distance]
  234. * @throws \Exception
  235. */
  236. public function dataProviderForMahalanobisCenter(): array
  237. {
  238. $data = [
  239. [4, 4, 5, 2, 3, 6, 9, 7, 4, 5],
  240. [3, 7, 5, 7, 9, 5, 6, 2, 2, 7],
  241. ];
  242. $data_matrix = new NumericMatrix($data);
  243. return [
  244. [
  245. [[4], [3]],
  246. $data_matrix,
  247. 1.24017
  248. ],
  249. [
  250. [[4], [7]],
  251. $data_matrix,
  252. 0.76023
  253. ],
  254. [
  255. [[5], [5]],
  256. $data_matrix,
  257. 0.12775
  258. ],
  259. [
  260. [[2], [7]],
  261. $data_matrix,
  262. 1.46567
  263. ],
  264. [
  265. [[3], [9]],
  266. $data_matrix,
  267. 1.64518
  268. ],
  269. ];
  270. }
  271. /**
  272. * @test Mahalanobis between two points
  273. * @dataProvider dataProviderForMahalanobisPoint
  274. * @param array $x
  275. * @param array $y
  276. * @param NumericMatrix $data
  277. * @param float $expectedDistance
  278. * @throws \Exception
  279. */
  280. public function testMahalanobisPoint(array $x, array $y, NumericMatrix $data, float $expectedDistance)
  281. {
  282. // Given
  283. $x_m = new NumericMatrix($x);
  284. $y_m = new NumericMatrix($y);
  285. // when
  286. $distance = Distance::mahalanobis($x_m, $data, $y_m);
  287. // Then
  288. $this->assertEqualsWithDelta($expectedDistance, $distance, 0.0001);
  289. }
  290. /**
  291. * @return array [x, y, data, distance]
  292. * @throws \Exception
  293. */
  294. public function dataProviderForMahalanobisPoint(): array
  295. {
  296. $data = [
  297. [4, 4, 5, 2, 3, 6, 9, 7, 4, 5],
  298. [3, 7, 5, 7, 9, 5, 6, 2, 2, 7],
  299. ];
  300. $data_matrix = new NumericMatrix($data);
  301. return [
  302. [
  303. [[6], [5]],
  304. [[2], [2]],
  305. $data_matrix,
  306. 2.76992
  307. ],
  308. [
  309. [[9], [6]],
  310. [[2], [2]],
  311. $data_matrix,
  312. 4.47614
  313. ],
  314. [
  315. [[7], [2]],
  316. [[2], [2]],
  317. $data_matrix,
  318. 2.58465
  319. ],
  320. [
  321. [[4], [2]],
  322. [[2], [2]],
  323. $data_matrix,
  324. 1.03386
  325. ],
  326. [
  327. [[5], [7]],
  328. [[2], [-2]],
  329. $data_matrix,
  330. 4.6909
  331. ],
  332. ];
  333. }
  334. /**
  335. * @test Mahalanobis between two datasets
  336. * https://rdrr.io/rforge/GenAlgo/man/maha.html
  337. * @throws \Exception
  338. */
  339. public function testMahalanobisTwoData()
  340. {
  341. // Given
  342. $data1 = new NumericMatrix([
  343. [4, 4, 5, 2, 3, 6, 9, 7, 4, 5],
  344. [3, 7, 5, 7, 9, 5, 6, 2, 2, 7],
  345. ]);
  346. $data2 = new NumericMatrix([
  347. [5, 3, 6, 3, 9],
  348. [7, 6, 1, 2, 9],
  349. ]);
  350. // When
  351. $distance = Distance::mahalanobis($data2, $data1);
  352. // Then
  353. $this->assertEqualsWithDelta(0.1863069, $distance, 0.0001);
  354. }
  355. /**
  356. * @test Minkowski distance
  357. * @dataProvider dataProviderForMinkowskiDistance
  358. * @param float[] $x
  359. * @param float[] $y
  360. * @param int $p
  361. * @param float $expected
  362. */
  363. public function testMinkowski(array $x, array $y, int $p, float $expected)
  364. {
  365. // When
  366. $distanceXy = Distance::minkowski($x, $y, $p);
  367. $distanceYx = Distance::minkowski($y, $x, $p);
  368. // Then
  369. $this->assertEqualsWithDelta($expected, $distanceXy, 0.0000000001);
  370. $this->assertEqualsWithDelta($expected, $distanceYx, 0.0000000001);
  371. }
  372. /**
  373. * Test data created using Python: from scipy.spatial import distance
  374. * distance.minkowski(x, y, p)
  375. * @return array
  376. */
  377. public function dataProviderForMinkowskiDistance(): array
  378. {
  379. return [
  380. [
  381. [1, 0, 0],
  382. [0, 1, 0],
  383. 1,
  384. 2,
  385. ],
  386. [
  387. [1, 0, 0],
  388. [0, 1, 0],
  389. 2,
  390. 1.4142135623730951,
  391. ],
  392. [
  393. [1, 0, 0],
  394. [0, 1, 0],
  395. 3,
  396. 1.2599210498948732,
  397. ],
  398. [
  399. [1, 1, 0],
  400. [0, 1, 0],
  401. 1,
  402. 1,
  403. ],
  404. [
  405. [1, 1, 0],
  406. [0, 1, 0],
  407. 2,
  408. 1,
  409. ],
  410. [
  411. [1, 1, 0],
  412. [0, 1, 0],
  413. 3,
  414. 1,
  415. ],
  416. [
  417. [1, 2, 3],
  418. [0, 0, 0],
  419. 1,
  420. 6,
  421. ],
  422. [
  423. [1, 2, 3],
  424. [0, 0, 0],
  425. 2,
  426. 3.7416573867739413,
  427. ],
  428. [
  429. [1, 2, 3],
  430. [0, 0, 0],
  431. 3,
  432. 3.3019272488946263,
  433. ],
  434. [
  435. [0, 0, 0],
  436. [0, 0, 0],
  437. 1,
  438. 0,
  439. ],
  440. [
  441. [0, 0, 0],
  442. [0, 0, 0],
  443. 2,
  444. 0,
  445. ],
  446. [
  447. [0, 0, 0],
  448. [0, 0, 0],
  449. 3,
  450. 0,
  451. ],
  452. [
  453. [1, 1, 1],
  454. [1, 1, 1],
  455. 1,
  456. 0,
  457. ],
  458. [
  459. [1, 1, 1],
  460. [1, 1, 1],
  461. 2,
  462. 0,
  463. ],
  464. [
  465. [1, 1, 1],
  466. [1, 1, 1],
  467. 3,
  468. 0,
  469. ],
  470. [
  471. [56, 26, 83],
  472. [11, 82, 95],
  473. 1,
  474. 113,
  475. ],
  476. [
  477. [56, 26, 83],
  478. [11, 82, 95],
  479. 2,
  480. 72.83543093852057,
  481. ],
  482. [
  483. [56, 26, 83],
  484. [11, 82, 95],
  485. 3,
  486. 64.51064463863402,
  487. ],
  488. ];
  489. }
  490. /**
  491. * @test minkowski error when vectors are of different sizes
  492. */
  493. public function testMinkowskiErrorDifferentSizedVectors()
  494. {
  495. // Given
  496. $x = [1, 2, 3];
  497. $y = [1, 2];
  498. $irrelevantValueForP = 1;
  499. // Then
  500. $this->expectException(Exception\BadDataException::class);
  501. // When
  502. $distance = Distance::minkowski($x, $y, $irrelevantValueForP);
  503. }
  504. /**
  505. * @test minkowski error p value is < 1
  506. */
  507. public function testMinkowskiErrorPLessThanOne()
  508. {
  509. // Given
  510. $x = [1, 2, 3];
  511. $y = [1, 2, 3];
  512. $p = 0;
  513. // Then
  514. $this->expectException(Exception\BadDataException::class);
  515. // When
  516. $distance = Distance::minkowski($x, $y, $p);
  517. }
  518. /**
  519. * @test Euclidean distance
  520. * @dataProvider dataProviderForEuclideanDistance
  521. * @param float[] $x
  522. * @param float[] $y
  523. * @param float $expected
  524. */
  525. public function testEuclidean(array $x, array $y, float $expected)
  526. {
  527. // When
  528. $distance = Distance::euclidean($x, $y);
  529. // Then
  530. $this->assertEqualsWithDelta($expected, $distance, 0.0000000001);
  531. }
  532. /**
  533. * Test data created using Python: from scipy.spatial import distance
  534. * distance.euclidean(x, y)
  535. * @return array
  536. */
  537. public function dataProviderForEuclideanDistance(): array
  538. {
  539. return [
  540. [
  541. [1, 0, 0],
  542. [0, 1, 0],
  543. 1.4142135623730951,
  544. ],
  545. [
  546. [1, 1, 0],
  547. [0, 1, 0],
  548. 1,
  549. ],
  550. [
  551. [1, 2, 3],
  552. [0, 0, 0],
  553. 3.7416573867739413,
  554. ],
  555. [
  556. [0, 0, 0],
  557. [0, 0, 0],
  558. 0,
  559. ],
  560. [
  561. [1, 1, 1],
  562. [1, 1, 1],
  563. 0,
  564. ],
  565. [
  566. [56, 26, 83],
  567. [11, 82, 95],
  568. 72.83543093852057,
  569. ],
  570. ];
  571. }
  572. /**
  573. * @test euclidean error when vectors are of different sizes
  574. */
  575. public function testEuclideanErrorDifferentSizedVectors()
  576. {
  577. // Given
  578. $x = [1, 2, 3];
  579. $y = [1, 2];
  580. // Then
  581. $this->expectException(Exception\BadDataException::class);
  582. // When
  583. $distance = Distance::euclidean($x, $y);
  584. }
  585. /**
  586. * @test Manhattan distance
  587. * @dataProvider dataProviderForManhattanDistance
  588. * @param float[] $x
  589. * @param float[] $y
  590. * @param float $expected
  591. */
  592. public function testManhattan(array $x, array $y, float $expected)
  593. {
  594. // When
  595. $distance = Distance::manhattan($x, $y);
  596. // Then
  597. $this->assertEqualsWithDelta($expected, $distance, 0.0000000001);
  598. }
  599. /**
  600. * Test data created using Python: from scipy.spatial import distance
  601. * distance.minkowski(x, y, 1)
  602. * @return array
  603. */
  604. public function dataProviderForManhattanDistance(): array
  605. {
  606. return [
  607. [
  608. [1, 0, 0],
  609. [0, 1, 0],
  610. 2,
  611. ],
  612. [
  613. [1, 1, 0],
  614. [0, 1, 0],
  615. 1,
  616. ],
  617. [
  618. [1, 2, 3],
  619. [0, 0, 0],
  620. 6,
  621. ],
  622. [
  623. [0, 0, 0],
  624. [0, 0, 0],
  625. 0,
  626. ],
  627. [
  628. [1, 1, 1],
  629. [1, 1, 1],
  630. 0,
  631. ],
  632. [
  633. [56, 26, 83],
  634. [11, 82, 95],
  635. 113,
  636. ],
  637. ];
  638. }
  639. /**
  640. * @test manhattan error when vectors are of different sizes
  641. */
  642. public function testManhattanErrorDifferentSizedVectors()
  643. {
  644. // Given
  645. $x = [1, 2, 3];
  646. $y = [1, 2];
  647. // Then
  648. $this->expectException(Exception\BadDataException::class);
  649. // When
  650. $distance = Distance::manhattan($x, $y);
  651. }
  652. /**
  653. * @test cosine distance
  654. * @dataProvider dataProviderForCosineDistance
  655. * @param array $A
  656. * @param array $B
  657. * @param float $expected
  658. */
  659. public function testCosineDistance(array $A, array $B, float $expected)
  660. {
  661. // When
  662. $distance = Distance::cosine($A, $B);
  663. // Then
  664. $this->assertEqualsWithDelta($expected, $distance, 0.0000000001);
  665. }
  666. /**
  667. * Test data created using Python: from scipy.spatial import distance
  668. * distance.cosine(x, y)
  669. * @return array
  670. */
  671. public function dataProviderForCosineDistance(): array
  672. {
  673. return [
  674. [
  675. [1, 0, 0],
  676. [0, 1, 0],
  677. 1,
  678. ],
  679. [
  680. [100, 0, 0],
  681. [0, 1, 0],
  682. 1,
  683. ],
  684. [
  685. [1, 1, 0],
  686. [0, 1, 0],
  687. 0.29289321881345254,
  688. ],
  689. [
  690. [1, 1, 1],
  691. [1, 1, 1],
  692. 0,
  693. ],
  694. [
  695. [2, 2, 2],
  696. [2, 2, 2],
  697. 0,
  698. ],
  699. [
  700. [1, 1, 1],
  701. [2, 2, 2],
  702. 0,
  703. ],
  704. [
  705. [56, 26, 83],
  706. [11, 82, 95],
  707. 0.1840657409250167,
  708. ],
  709. ];
  710. }
  711. /**
  712. * @test cosine distance exception for null vector
  713. * @dataProvider dataProviderForCosineDistanceException
  714. * @param array $A
  715. * @param array $B
  716. */
  717. public function testCosineDistanceException(array $A, array $B)
  718. {
  719. // Then
  720. $this->expectException(Exception\BadDataException::class);
  721. // When
  722. $distance = Distance::cosine($A, $B);
  723. }
  724. /**
  725. * @return array
  726. */
  727. public function dataProviderForCosineDistanceException(): array
  728. {
  729. return [
  730. [
  731. [1, 2, 3],
  732. [0, 0, 0],
  733. ],
  734. [
  735. [0, 0, 0],
  736. [1, 2, 3],
  737. ],
  738. [
  739. [0, 0, 0],
  740. [0, 0, 0],
  741. ],
  742. ];
  743. }
  744. /**
  745. * @test cosineSimilarity
  746. * @dataProvider dataProviderForCosineSimilarity
  747. * @param array $A
  748. * @param array $B
  749. * @param float $expected
  750. */
  751. public function testCosineSimilarity(array $A, array $B, float $expected)
  752. {
  753. // When
  754. $distance = Distance::cosineSimilarity($A, $B);
  755. // Then
  756. $this->assertEqualsWithDelta($expected, $distance, 0.0000000001);
  757. }
  758. /**
  759. * Test data created using Python: from scipy.spatial import distance
  760. * 1 - distance.cosine(x, y)
  761. * Cross referenced with online calculator: https://www.emathhelp.net/calculators/linear-algebra/angle-between-two-vectors-calculator
  762. * @return array
  763. */
  764. public function dataProviderForCosineSimilarity(): array
  765. {
  766. return [
  767. [
  768. [1, 2, 3],
  769. [3, 2, 1],
  770. 0.7142857142857143,
  771. ],
  772. [
  773. [1, 0, 0],
  774. [0, 1, 0],
  775. 0,
  776. ],
  777. [
  778. [1, 0, 0],
  779. [0, 0, 1],
  780. 0,
  781. ],
  782. [
  783. [1, 0, 0],
  784. [1, 0, 0],
  785. 1,
  786. ],
  787. [
  788. [100, 0, 0],
  789. [0, 1, 0],
  790. 0,
  791. ],
  792. [
  793. [1, 1, 0],
  794. [0, 1, 0],
  795. 0.7071067811865475,
  796. ],
  797. [
  798. [1, 1, 1],
  799. [1, 1, 1],
  800. 1,
  801. ],
  802. [
  803. [2, 2, 2],
  804. [2, 2, 2],
  805. 1,
  806. ],
  807. [
  808. [1, 1, 1],
  809. [2, 2, 2],
  810. 1,
  811. ],
  812. [
  813. [56, 26, 83],
  814. [11, 82, 95],
  815. 0.8159342590749833,
  816. ],
  817. [
  818. [-1, 1, 0],
  819. [0, 1, -1],
  820. 0.5,
  821. ],
  822. [
  823. [23, 41, 33],
  824. [31, 56, 21],
  825. 0.9567820320723087,
  826. ],
  827. ];
  828. }
  829. /**
  830. * @test cosineSimilarity exception for null vector
  831. * @dataProvider dataProviderForCosineSimilarityException
  832. * @param array $A
  833. * @param array $B
  834. */
  835. public function testCosineSimilarityException(array $A, array $B)
  836. {
  837. // Then
  838. $this->expectException(Exception\BadDataException::class);
  839. // When
  840. $distance = Distance::cosineSimilarity($A, $B);
  841. }
  842. /**
  843. * @return array
  844. */
  845. public function dataProviderForCosineSimilarityException(): array
  846. {
  847. return [
  848. [
  849. [1, 2, 3],
  850. [0, 0, 0],
  851. ],
  852. [
  853. [0, 0, 0],
  854. [1, 2, 3],
  855. ],
  856. [
  857. [0, 0, 0],
  858. [0, 0, 0],
  859. ],
  860. ];
  861. }
  862. /**
  863. * @test brayCurtis
  864. * @dataProvider dataProviderForBrayCurtis
  865. * @param array $u
  866. * @param array $v
  867. * @param float $expected
  868. */
  869. public function testBrayCurtis(array $u, array $v, float $expected)
  870. {
  871. // When
  872. $distance = Distance::brayCurtis($u, $v);
  873. // Then
  874. $this->assertEqualsWithDelta($expected, $distance, 0.0001);
  875. }
  876. /**
  877. * Test data created with Python scipy.spatial.distance.braycurtis
  878. * distance.braycurtis(u, v)
  879. * @return array [u, v, distance]
  880. */
  881. public function dataProviderForBrayCurtis(): array
  882. {
  883. return [
  884. [
  885. [1, 0, 0],
  886. [0, 1, 0],
  887. 1,
  888. ],
  889. [
  890. [1, 1, 0],
  891. [0, 1, 0],
  892. 0.33333333333333331,
  893. ],
  894. [
  895. [1, 2, 3],
  896. [1, 2, 3],
  897. 0,
  898. ],
  899. [
  900. [1, 2, 3],
  901. [3, 2, 1],
  902. 0.3333333333333333,
  903. ],
  904. [
  905. [0.4, 0.6],
  906. [0.5, 0.5],
  907. 0.09999999999999998,
  908. ],
  909. [
  910. [0.1, 0.2, 0.2, 0.2, 0.2, 0.1],
  911. [0.0, 0.1, 0.4, 0.4, 0.1, 0.0],
  912. 0.4
  913. ],
  914. [
  915. [0.25, 0.5, 0.25],
  916. [0.5, 0.3, 0.2],
  917. 0.25,
  918. ],
  919. [
  920. [0.5, 0.3, 0.2],
  921. [0.25, 0.5, 0.25],
  922. 0.25,
  923. ],
  924. [
  925. [1],
  926. [0],
  927. 1,
  928. ],
  929. [
  930. [0],
  931. [1],
  932. 1,
  933. ],
  934. [
  935. [1],
  936. [1],
  937. 0,
  938. ],
  939. [
  940. [-1],
  941. [-1],
  942. 0,
  943. ],
  944. [
  945. [-2],
  946. [-3],
  947. 0.2,
  948. ],
  949. ];
  950. }
  951. /**
  952. * @test brayCurtis NAN
  953. * @dataProvider dataProviderForBrayCurtisNan
  954. * @param array $u
  955. * @param array $v
  956. */
  957. public function testBrayCurtisNan(array $u, array $v)
  958. {
  959. // When
  960. $distance = Distance::brayCurtis($u, $v);
  961. // Then
  962. $this->assertNan($distance);
  963. }
  964. /**
  965. * @return array
  966. */
  967. public function dataProviderForBrayCurtisNan(): array
  968. {
  969. return [
  970. 'both zero ' => [
  971. [0],
  972. [0],
  973. ],
  974. '∑|uᵢ + vᵢ| denominator is zero (1)' => [
  975. [1],
  976. [-1],
  977. ],
  978. '∑|uᵢ + vᵢ| demoninator is zero (2)' => [
  979. [1, 2],
  980. [-1, -2],
  981. ],
  982. ];
  983. }
  984. /**
  985. * @test brayCurtis exception when inputs are different lengths
  986. * @throws Exception\BadDataException
  987. */
  988. public function testBrayCurtisExceptionDifferentNumberElements()
  989. {
  990. // Given
  991. $u = [1, 2, 3];
  992. $v = [2, 3];
  993. // Then
  994. $this->expectException(Exception\BadDataException::class);
  995. // When
  996. $distance = Distance::brayCurtis($u, $v);
  997. }
  998. /**
  999. * @test canberra
  1000. * @dataProvider dataProviderForCanberra
  1001. * @param array $p
  1002. * @param array $q
  1003. * @param float $expected
  1004. */
  1005. public function testCanberra(array $p, array $q, float $expected)
  1006. {
  1007. // When
  1008. $distance = Distance::canberra($p, $q);
  1009. // Then
  1010. $this->assertEqualsWithDelta($expected, $distance, 0.0001);
  1011. }
  1012. /**
  1013. * Test data created with Python scipy.spatial.distance.canberra
  1014. * distance.canberra(p, q)
  1015. * @return array [p, q, distance]
  1016. */
  1017. public function dataProviderForCanberra(): array
  1018. {
  1019. return [
  1020. [
  1021. [1, 0, 0],
  1022. [0, 1, 0],
  1023. 2,
  1024. ],
  1025. [
  1026. [1, 1, 0],
  1027. [0, 1, 0],
  1028. 1,
  1029. ],
  1030. [
  1031. [1, 2, 3],
  1032. [1, 2, 3],
  1033. 0,
  1034. ],
  1035. [
  1036. [1, 2, 3],
  1037. [3, 2, 1],
  1038. 1,
  1039. ],
  1040. [
  1041. [0.4, 0.6],
  1042. [0.5, 0.5],
  1043. 0.20202020202020196,
  1044. ],
  1045. [
  1046. [0.1, 0.2, 0.2, 0.2, 0.2, 0.1],
  1047. [0.0, 0.1, 0.4, 0.4, 0.1, 0.0],
  1048. 3.333333333333333
  1049. ],
  1050. [
  1051. [0.25, 0.5, 0.25],
  1052. [0.5, 0.3, 0.2],
  1053. 0.6944444444444443,
  1054. ],
  1055. [
  1056. [0.5, 0.3, 0.2],
  1057. [0.25, 0.5, 0.25],
  1058. 0.6944444444444443,
  1059. ],
  1060. [
  1061. [1],
  1062. [0],
  1063. 1,
  1064. ],
  1065. [
  1066. [0],
  1067. [1],
  1068. 1,
  1069. ],
  1070. [
  1071. [1],
  1072. [1],
  1073. 0,
  1074. ],
  1075. [
  1076. [-1],
  1077. [-1],
  1078. 0,
  1079. ],
  1080. [
  1081. [-2],
  1082. [-3],
  1083. 0.2,
  1084. ],
  1085. [
  1086. [1, 1, 1],
  1087. [1, 1, 0],
  1088. 1,
  1089. ],
  1090. [
  1091. [1, 1, 0],
  1092. [1, 1, 1],
  1093. 1,
  1094. ],
  1095. [
  1096. [1, 1, 1],
  1097. [10, 5, 0],
  1098. 2.484848484848485,
  1099. ],
  1100. [
  1101. [10, 5, 0],
  1102. [1, 1, 1],
  1103. 2.484848484848485,
  1104. ],
  1105. [
  1106. [10, 10, 10],
  1107. [11, 11, 11],
  1108. 0.14285714285714285,
  1109. ],
  1110. [
  1111. [11, 11, 11],
  1112. [10, 10, 10],
  1113. 0.14285714285714285,
  1114. ],
  1115. ];
  1116. }
  1117. /**
  1118. * @test canberra NAN
  1119. * @dataProvider dataProviderForCanberraNan
  1120. * @param array $u
  1121. * @param array $v
  1122. */
  1123. public function testCanberraNan(array $u, array $v)
  1124. {
  1125. // When
  1126. $distance = Distance::canberra($u, $v);
  1127. // Then
  1128. $this->assertNan($distance);
  1129. }
  1130. /**
  1131. * @return array
  1132. */
  1133. public function dataProviderForCanberraNan(): array
  1134. {
  1135. return [
  1136. 'both zero ' => [
  1137. [0],
  1138. [0],
  1139. ],
  1140. 'all zeros' => [
  1141. [0, 0, 0],
  1142. [0, 0, 0],
  1143. ],
  1144. ];
  1145. }
  1146. /**
  1147. * @test canberra exception when inputs are different lengths
  1148. * @throws Exception\BadDataException
  1149. */
  1150. public function testCanberraExceptionDifferentNumberElements()
  1151. {
  1152. // Given
  1153. $p = [1, 2, 3];
  1154. $q = [2, 3];
  1155. // Then
  1156. $this->expectException(Exception\BadDataException::class);
  1157. // When
  1158. $distance = Distance::canberra($p, $q);
  1159. }
  1160. /**
  1161. * @test chebyshev
  1162. * @dataProvider dataProviderForChebyshev
  1163. * @param array $x
  1164. * @param array $y
  1165. * @param float $expected
  1166. */
  1167. public function testChebyshev(array $x, array $y, float $expected): void
  1168. {
  1169. // When
  1170. $distance = Distance::chebyshev($x, $y);
  1171. // Then
  1172. $this->assertEqualsWithDelta($expected, $distance, 0.0001);
  1173. }
  1174. public function dataProviderForChebyshev(): array
  1175. {
  1176. return [
  1177. [
  1178. [0],
  1179. [0],
  1180. 0
  1181. ],
  1182. [
  1183. [1],
  1184. [1],
  1185. 0
  1186. ],
  1187. [
  1188. [1],
  1189. [0],
  1190. 1
  1191. ],
  1192. [
  1193. [0],
  1194. [1],
  1195. 1
  1196. ],
  1197. [
  1198. [1, 2],
  1199. [2, 4],
  1200. 2
  1201. ],
  1202. [
  1203. [1, 2, 3],
  1204. [2, 4, 6],
  1205. 3
  1206. ],
  1207. [
  1208. [0, 3, 4, 5],
  1209. [7, 6, 3, -1],
  1210. 7
  1211. ],
  1212. [
  1213. [1, 2, 3, 4],
  1214. [-5, -6, 7, 8],
  1215. 8
  1216. ],
  1217. [
  1218. [1, 5, 2, 3, 10],
  1219. [4, 15, 20, 5, 5],
  1220. 18
  1221. ],
  1222. [
  1223. [1, 5, 2, 3, 10],
  1224. [1, 5, 2, 3, 10],
  1225. 0
  1226. ],
  1227. [
  1228. [4, 15, 20, 5, 5],
  1229. [4, 15, 20, 5, 5],
  1230. 0
  1231. ],
  1232. ];
  1233. }
  1234. /**
  1235. * @test chebyshev exception when inputs are different lengths
  1236. * @throws Exception\BadDataException
  1237. */
  1238. public function testChebyshevExceptionDifferentNumberElements()
  1239. {
  1240. // Given
  1241. $xs = [1, 2, 3];
  1242. $ys = [2, 3];
  1243. // Then
  1244. $this->expectException(Exception\BadDataException::class);
  1245. // When
  1246. Distance::chebyshev($xs, $ys);
  1247. }
  1248. }