Average.php 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955
  1. <?php
  2. namespace MathPHP\Statistics;
  3. use MathPHP\Functions\Map;
  4. use MathPHP\Exception;
  5. /**
  6. * Statistical averages
  7. */
  8. class Average
  9. {
  10. /**************************************************************************
  11. * Averages of a list of numbers
  12. **************************************************************************/
  13. /**
  14. * Calculate the mean average of a list of numbers
  15. *
  16. * ∑⟮xᵢ⟯
  17. * x̄ = -----
  18. * n
  19. *
  20. * @param float[] $numbers
  21. *
  22. * @return float
  23. *
  24. * @throws Exception\BadDataException if the input array of numbers is empty
  25. */
  26. public static function mean(array $numbers): float
  27. {
  28. if (empty($numbers)) {
  29. throw new Exception\BadDataException('Cannot find the average of an empty list of numbers');
  30. }
  31. return \array_sum($numbers) / \count($numbers);
  32. }
  33. /**
  34. * Calculate the weighted mean average of a list of numbers
  35. * https://en.wikipedia.org/wiki/Weighted_arithmetic_mean
  36. *
  37. * ∑⟮xᵢwᵢ⟯
  38. * x̄ = -----
  39. * ∑⟮wᵢ⟯
  40. *
  41. * @param float[] $numbers
  42. * @param float[] $weights
  43. *
  44. * @return float
  45. *
  46. * @throws Exception\BadDataException if the input array of numbers is empty
  47. * @throws Exception\BadDataException if the number of numbers and weights are not equal
  48. */
  49. public static function weightedMean(array $numbers, array $weights): float
  50. {
  51. if (empty($numbers)) {
  52. throw new Exception\BadDataException('Cannot find the weightedMean of an empty list of numbers');
  53. }
  54. if (empty($weights)) {
  55. return Average::mean($numbers);
  56. }
  57. if (\count($numbers) !== \count($weights)) {
  58. throw new Exception\BadDataException('Numbers and weights must have the same number of elements.');
  59. }
  60. $∑⟮xᵢwᵢ⟯ = \array_sum(\array_map(
  61. function ($xᵢ, $wᵢ) {
  62. return $xᵢ * $wᵢ;
  63. },
  64. $numbers,
  65. $weights
  66. ));
  67. $∑⟮wᵢ⟯ = \array_sum($weights);
  68. return $∑⟮xᵢwᵢ⟯ / $∑⟮wᵢ⟯;
  69. }
  70. /**
  71. * Calculate the median average of a list of numbers
  72. *
  73. * @param float[] $numbers
  74. *
  75. * @return float
  76. *
  77. * @throws Exception\BadDataException if the input array of numbers is empty
  78. * @throws Exception\OutOfBoundsException if kth-smallest k is out of bounds
  79. */
  80. public static function median(array $numbers): float
  81. {
  82. if (empty($numbers)) {
  83. throw new Exception\BadDataException('Cannot find the median of an empty list of numbers');
  84. }
  85. if (\count($numbers) === 1) {
  86. return \array_pop($numbers);
  87. }
  88. // Reset the array key indexes because we don't know what might be passed in
  89. $numbers = \array_values($numbers);
  90. // For odd number of numbers, take the middle indexed number
  91. if (\count($numbers) % 2 == 1) {
  92. $middle_index = \intdiv(\count($numbers), 2);
  93. return self::kthSmallest($numbers, $middle_index);
  94. }
  95. // For even number of items, take the mean of the middle two indexed numbers
  96. $left_middle_index = \intdiv(\count($numbers), 2) - 1;
  97. $left_median = self::kthSmallest($numbers, $left_middle_index);
  98. $right_middle_index = $left_middle_index + 1;
  99. $right_median = self::kthSmallest($numbers, $right_middle_index);
  100. return self::mean([ $left_median, $right_median ]);
  101. }
  102. /**
  103. * Return the kth smallest value in an array
  104. * Uses a linear-time algorithm: O(n) time in worst case.
  105. *
  106. * if $a = [1,2,3,4,6,7]
  107. *
  108. * kthSmallest($a, 4) = 6
  109. *
  110. * Algorithm:
  111. * 1) If n is small, just sort and return
  112. * 2) Otherwise, group into 5-element subsets and mind the median
  113. * 3) Find the median of the medians
  114. * 4) Find L and U sets
  115. * - L is numbers lower than the median of medians
  116. * - U is numbers higher than the median of medians
  117. * 5) Recursive step
  118. * - if k is the median of medians, return that
  119. * - Otherwise, recursively search in smaller group.
  120. *
  121. * @param float[] $numbers
  122. * @param int $k zero indexed - must be less than n (count of $numbers)
  123. *
  124. * @return float
  125. *
  126. * @throws Exception\BadDataException if the input array of numbers is empty
  127. * @throws Exception\OutOfBoundsException if k ≥ n
  128. */
  129. public static function kthSmallest(array $numbers, int $k): float
  130. {
  131. $n = \count($numbers);
  132. if ($n === 0) {
  133. throw new Exception\BadDataException('Cannot find the k-th smallest of an empty list of numbers');
  134. }
  135. if ($k >= $n) {
  136. throw new Exception\OutOfBoundsException('k cannot be greater than or equal to the count of numbers');
  137. }
  138. // Reset the array key indexes because we don't know what might be passed in
  139. $numbers = \array_values($numbers);
  140. // If the array is 5 elements or smaller, use quicksort and return the element of interest.
  141. if ($n <= 5) {
  142. \sort($numbers);
  143. return $numbers[$k];
  144. }
  145. // Otherwise, we are going to slice $numbers into 5-element slices and find the median of each.
  146. $num_slices = \ceil($n / 5);
  147. $median_array = [];
  148. for ($i = 0; $i < $num_slices; $i++) {
  149. $median_array[] = self::median(\array_slice($numbers, 5 * $i, 5));
  150. }
  151. // Then we find the median of the medians.
  152. $median_of_medians = self::median($median_array);
  153. // Next we walk the array and separate it into values that are greater than or less than this "median of medians".
  154. $lower_upper = self::splitAtValue($numbers, $median_of_medians);
  155. $lower_number = \count($lower_upper['lower']);
  156. $equal_number = $lower_upper['equal'];
  157. // Lastly, we find which group of values our value of interest is in, and find it in the smaller array.
  158. if ($k < $lower_number) {
  159. return self::kthSmallest($lower_upper['lower'], $k);
  160. } elseif ($k < ($lower_number + $equal_number)) {
  161. return $median_of_medians;
  162. } else {
  163. return self::kthSmallest($lower_upper['upper'], $k - $lower_number - $equal_number);
  164. }
  165. }
  166. /**
  167. * Given an array and a value, separate the array into two groups,
  168. * those values which are greater than the value, and those that are less
  169. * than the value. Also, tell how many times the value appears in the array.
  170. *
  171. * @param float[] $numbers
  172. * @param float $value
  173. *
  174. * @return array{
  175. * lower: array<float>,
  176. * upper: array<float>,
  177. * equal: int,
  178. * }
  179. */
  180. private static function splitAtValue(array $numbers, float $value): array
  181. {
  182. $lower = [];
  183. $upper = [];
  184. $number_equal = 0;
  185. foreach ($numbers as $number) {
  186. if ($number < $value) {
  187. $lower[] = $number;
  188. } elseif ($number > $value) {
  189. $upper[] = $number;
  190. } else {
  191. $number_equal++;
  192. }
  193. }
  194. return [
  195. 'lower' => $lower,
  196. 'upper' => $upper,
  197. 'equal' => $number_equal,
  198. ];
  199. }
  200. /**
  201. * Calculate the mode average of a list of numbers
  202. * If multiple modes (bimodal, trimodal, etc.), all modes will be returned.
  203. * Always returns an array, even if only one mode.
  204. *
  205. * @param float[] $numbers
  206. *
  207. * @return float[] of mode(s)
  208. *
  209. * @throws Exception\BadDataException if the input array of numbers is empty
  210. */
  211. public static function mode(array $numbers): array
  212. {
  213. if (empty($numbers)) {
  214. throw new Exception\BadDataException('Cannot find the mode of an empty list of numbers');
  215. }
  216. // Count how many times each number occurs.
  217. // Determine the max any number occurs.
  218. // Find all numbers that occur max times.
  219. $number_strings = \array_map('\strval', $numbers);
  220. $number_counts = \array_count_values($number_strings);
  221. $max = \max($number_counts);
  222. $modes = array();
  223. foreach ($number_counts as $number => $count) {
  224. if ($count === $max) {
  225. $modes[] = $number;
  226. }
  227. }
  228. // Cast back to numbers
  229. return \array_map('\floatval', $modes);
  230. }
  231. /**
  232. * Geometric mean
  233. * A type of mean which indicates the central tendency or typical value of a set of numbers
  234. * by using the product of their values (as opposed to the arithmetic mean which uses their sum).
  235. * https://en.wikipedia.org/wiki/Geometric_mean
  236. * __________
  237. * Geometric mean = ⁿ√a₀a₁a₂ ⋯
  238. *
  239. * @param float[] $numbers
  240. *
  241. * @return float
  242. *
  243. * @throws Exception\BadDataException if the input array of numbers is empty
  244. */
  245. public static function geometricMean(array $numbers): float
  246. {
  247. if (empty($numbers)) {
  248. throw new Exception\BadDataException('Cannot find the geometric mean of an empty list of numbers');
  249. }
  250. $n = \count($numbers);
  251. $a₀a₁a₂⋯ = \array_reduce(
  252. $numbers,
  253. function ($carry, $a) {
  254. return $carry * $a;
  255. },
  256. 1
  257. );
  258. $ⁿ√a₀a₁a₂⋯ = \pow($a₀a₁a₂⋯, 1 / $n);
  259. return $ⁿ√a₀a₁a₂⋯;
  260. }
  261. /**
  262. * Harmonic mean (subcontrary mean)
  263. * The harmonic mean can be expressed as the reciprocal of the arithmetic mean of the reciprocals.
  264. * Appropriate for situations when the average of rates is desired.
  265. * https://en.wikipedia.org/wiki/Harmonic_mean
  266. *
  267. *
  268. * n
  269. * H = ------
  270. * n 1
  271. * ∑ -
  272. * ⁱ⁼¹ xᵢ
  273. *
  274. * @param float[] $numbers
  275. *
  276. * @return float
  277. *
  278. * @throws Exception\BadDataException if the input array of numbers is empty
  279. * @throws Exception\BadDataException if there are negative numbers
  280. */
  281. public static function harmonicMean(array $numbers): float
  282. {
  283. if (empty($numbers)) {
  284. throw new Exception\BadDataException('Cannot find the harmonic mean of an empty list of numbers');
  285. }
  286. $negativeValues = \array_filter(
  287. $numbers,
  288. function ($x) {
  289. return $x < 0;
  290. }
  291. );
  292. if (!empty($negativeValues)) {
  293. throw new Exception\BadDataException('Harmonic mean cannot be computed for negative values.');
  294. }
  295. $n = \count($numbers);
  296. $∑1/xᵢ = \array_sum(Map\Single::reciprocal($numbers));
  297. return $n / $∑1/xᵢ;
  298. }
  299. /**
  300. * Contraharmonic mean
  301. * A function complementary to the harmonic mean.
  302. * A special case of the Lehmer mean, L₂(x), where p = 2.
  303. * https://en.wikipedia.org/wiki/Contraharmonic_mean
  304. *
  305. * @param float[] $numbers
  306. *
  307. * @return float
  308. */
  309. public static function contraharmonicMean(array $numbers): float
  310. {
  311. $p = 2;
  312. return self::lehmerMean($numbers, $p);
  313. }
  314. /**
  315. * Root mean square (quadratic mean)
  316. * The square root of the arithmetic mean of the squares of a set of numbers.
  317. * https://en.wikipedia.org/wiki/Root_mean_square
  318. * ___________
  319. * /x₁+²x₂²+ ⋯
  320. * x rms = / -----------
  321. * √ n
  322. *
  323. * @param float[] $numbers
  324. *
  325. * @return float
  326. *
  327. * @throws Exception\BadDataException if the input array of numbers is empty
  328. */
  329. public static function rootMeanSquare(array $numbers): float
  330. {
  331. if (empty($numbers)) {
  332. throw new Exception\BadDataException('Cannot find the root mean square of an empty list of numbers');
  333. }
  334. $n = \count($numbers);
  335. $x₁²+x₂²+⋯ = \array_sum(\array_map(
  336. function ($x) {
  337. return $x ** 2;
  338. },
  339. $numbers
  340. ));
  341. return \sqrt($x₁²+x₂²+⋯ / $n);
  342. }
  343. /**
  344. * Quadradic mean (root mean square)
  345. * Convenience function for rootMeanSquare
  346. *
  347. * @param float[] $numbers
  348. *
  349. * @return float
  350. *
  351. * @throws Exception\BadDataException if the input array of numbers is empty
  352. */
  353. public static function quadraticMean(array $numbers): float
  354. {
  355. return self::rootMeanSquare($numbers);
  356. }
  357. /**
  358. * Trimean (TM, or Tukey's trimean)
  359. * A measure of a probability distribution's location defined as
  360. * a weighted average of the distribution's median and its two quartiles.
  361. * https://en.wikipedia.org/wiki/Trimean
  362. *
  363. * Q₁ + 2Q₂ + Q₃
  364. * TM = -------------
  365. * 4
  366. *
  367. * @param float[] $numbers
  368. *
  369. * @return float
  370. *
  371. * @throws Exception\BadDataException if the input array of numbers is empty
  372. */
  373. public static function trimean(array $numbers): float
  374. {
  375. $quartiles = Descriptive::quartiles($numbers);
  376. $Q₁ = $quartiles['Q1'];
  377. $Q₂ = $quartiles['Q2'];
  378. $Q₃ = $quartiles['Q3'];
  379. return ($Q₁ + 2 * $Q₂ + $Q₃) / 4;
  380. }
  381. /**
  382. * Interquartile mean (IQM)
  383. * A measure of central tendency based on the truncated mean of the interquartile range.
  384. * Only the data in the second and third quartiles is used (as in the interquartile range),
  385. * and the lowest 25% and the highest 25% of the scores are discarded.
  386. * https://en.wikipedia.org/wiki/Interquartile_mean
  387. *
  388. * @param float[] $numbers
  389. *
  390. * @return float
  391. *
  392. * @throws Exception\OutOfBoundsException
  393. */
  394. public static function interquartileMean(array $numbers): float
  395. {
  396. return self::truncatedMean($numbers, 25);
  397. }
  398. /**
  399. * IQM (Interquartile mean)
  400. * Convenience function for interquartileMean
  401. *
  402. * @param float[] $numbers
  403. *
  404. * @return float
  405. *
  406. * @throws Exception\OutOfBoundsException
  407. */
  408. public static function iqm(array $numbers): float
  409. {
  410. return self::truncatedMean($numbers, 25);
  411. }
  412. /**
  413. * Cubic mean
  414. * https://en.wikipedia.org/wiki/Cubic_mean
  415. * _________
  416. * / 1 n
  417. * x cubic = ³/ - ∑ xᵢ³
  418. * √ n ⁱ⁼¹
  419. *
  420. * @param array<float> $numbers
  421. *
  422. * @return float
  423. *
  424. * @throws Exception\BadDataException if the input array of numbers is empty
  425. */
  426. public static function cubicMean(array $numbers): float
  427. {
  428. if (empty($numbers)) {
  429. throw new Exception\BadDataException('Cannot find the cubic mean of an empty list of numbers');
  430. }
  431. $n = \count($numbers);
  432. $∑xᵢ³ = \array_sum(Map\Single::cube($numbers));
  433. return \pow($∑xᵢ³ / $n, 1 / 3);
  434. }
  435. /**
  436. * Truncated mean (trimmed mean)
  437. * The mean after discarding given parts of a probability distribution or sample
  438. * at the high and low end, and typically discarding an equal amount of both.
  439. * This number of points to be discarded is given as a percentage of the total number of points.
  440. * https://en.wikipedia.org/wiki/Truncated_mean
  441. *
  442. * Trim count = floor((trim percent / 100) * sample size)
  443. *
  444. * For example: [8, 3, 7, 1, 3, 9] with a trim of 20%
  445. * First sort the list: [1, 3, 3, 7, 8, 9]
  446. * Sample size = 6
  447. * Then determine trim count: floor(20/100 * 6) = 1
  448. * Trim the list by removing 1 from each end: [3, 3, 7, 8]
  449. * Finally, find the mean: 5.2
  450. *
  451. * @param float[] $numbers
  452. * @param int $trim_percent Percent between 0-50 indicating percent of observations trimmed from each end of distribution
  453. *
  454. * @return float
  455. *
  456. * @throws Exception\OutOfBoundsException if trim percent is not between 0 and 50
  457. * @throws Exception\BadDataException if the input array of numbers is empty
  458. */
  459. public static function truncatedMean(array $numbers, int $trim_percent): float
  460. {
  461. if (empty($numbers)) {
  462. throw new Exception\BadDataException('Cannot find the truncated mean of an empty list of numbers');
  463. }
  464. if ($trim_percent < 0 || $trim_percent > 50) {
  465. throw new Exception\OutOfBoundsException('Trim percent must be between 0 and 50.');
  466. }
  467. $n = \count($numbers);
  468. $trim_count = \floor($n * ($trim_percent / 100));
  469. \sort($numbers);
  470. if ($trim_percent == 50) {
  471. return self::median($numbers);
  472. }
  473. for ($i = 1; $i <= $trim_count; $i++) {
  474. \array_shift($numbers);
  475. \array_pop($numbers);
  476. }
  477. return self::mean($numbers);
  478. }
  479. /**
  480. * Lehmer mean
  481. * https://en.wikipedia.org/wiki/Lehmer_mean
  482. *
  483. * ∑xᵢᵖ
  484. * Lp(x) = ------
  485. * ∑xᵢᵖ⁻¹
  486. *
  487. * Special cases:
  488. * L-∞(x) is the min(x)
  489. * L₀(x) is the harmonic mean
  490. * L½(x₀, x₁) is the geometric mean if computed against two numbers
  491. * L₁(x) is the arithmetic mean
  492. * L₂(x) is the contraharmonic mean
  493. * L∞(x) is the max(x)
  494. *
  495. * @param float[] $numbers
  496. * @param float $p
  497. *
  498. * @return float
  499. *
  500. * @throws Exception\BadDataException if the input array of numbers is empty
  501. */
  502. public static function lehmerMean(array $numbers, $p): float
  503. {
  504. if (empty($numbers)) {
  505. throw new Exception\BadDataException('Cannot find the lehmer mean of an empty list of numbers');
  506. }
  507. // Special cases for infinite p
  508. if ($p == -\INF) {
  509. return \min($numbers);
  510. }
  511. if ($p == \INF) {
  512. return \max($numbers);
  513. }
  514. // Standard case for non-infinite p
  515. $∑xᵢᵖ = \array_sum(Map\Single::pow($numbers, $p));
  516. $∑xᵢᵖ⁻¹ = \array_sum(Map\Single::pow($numbers, $p - 1));
  517. return $∑xᵢᵖ / $∑xᵢᵖ⁻¹;
  518. }
  519. /**
  520. * Generalized mean (power mean, Hölder mean)
  521. * https://en.wikipedia.org/wiki/Generalized_mean
  522. *
  523. * / 1 n \ 1/p
  524. * Mp(x) = | - ∑ xᵢᵖ|
  525. * \ n ⁱ⁼¹ /
  526. *
  527. * Special cases:
  528. * M-∞(x) is \min(x)
  529. * M₋₁(x) is the harmonic mean
  530. * M₀(x) is the geometric mean
  531. * M₁(x) is the arithmetic mean
  532. * M₂(x) is the quadratic mean
  533. * M₃(x) is the cubic mean
  534. * M∞(x) is max(X)
  535. *
  536. * @param float[] $numbers
  537. * @param float $p
  538. *
  539. * @return float
  540. *
  541. * @throws Exception\BadDataException if the input array of numbers is empty
  542. */
  543. public static function generalizedMean(array $numbers, float $p): float
  544. {
  545. if (empty($numbers)) {
  546. throw new Exception\BadDataException('Cannot find the generalized mean of an empty list of numbers');
  547. }
  548. // Special cases for infinite p
  549. if ($p == -\INF) {
  550. return \min($numbers);
  551. }
  552. if ($p == \INF) {
  553. return \max($numbers);
  554. }
  555. // Special case for p = 0 (geometric mean)
  556. if ($p == 0) {
  557. return self::geometricMean($numbers);
  558. }
  559. // Standard case for non-infinite p
  560. $n = \count($numbers);
  561. $∑xᵢᵖ = \array_sum(Map\Single::pow($numbers, $p));
  562. return \pow($∑xᵢᵖ / $n, 1 / $p);
  563. }
  564. /**
  565. * Power mean (generalized mean)
  566. * Convenience method for generalizedMean
  567. *
  568. * @param float[] $numbers
  569. * @param float $p
  570. *
  571. * @return float
  572. *
  573. * @throws Exception\BadDataException if the input array of numbers is empty
  574. */
  575. public static function powerMean(array $numbers, float $p): float
  576. {
  577. return self::generalizedMean($numbers, $p);
  578. }
  579. /**************************************************************************
  580. * Moving averages (list of numbers)
  581. **************************************************************************/
  582. /**
  583. * Simple n-point moving average SMA
  584. * The unweighted mean of the previous n data.
  585. *
  586. * First calculate initial average:
  587. * ⁿ⁻¹
  588. * ∑ xᵢ
  589. * ᵢ₌₀
  590. *
  591. * To calculating successive values, a new value comes into the sum and an old value drops out:
  592. * SMAtoday = SMAyesterday + NewNumber/N - DropNumber/N
  593. *
  594. * @param float[] $numbers
  595. * @param int $n n-point moving average
  596. *
  597. * @return float[] of averages for each n-point time period
  598. */
  599. public static function simpleMovingAverage(array $numbers, int $n): array
  600. {
  601. $m = \count($numbers);
  602. $SMA = [];
  603. // Counters
  604. $new = $n; // New value comes into the sum
  605. $drop = 0; // Old value drops out
  606. $yesterday = 0; // Yesterday's SMA
  607. // Base case: initial average
  608. $SMA[] = \array_sum(\array_slice($numbers, 0, $n)) / $n;
  609. // Calculating successive values: New value comes in; old value drops out
  610. while ($new < $m) {
  611. $SMA[] = $SMA[$yesterday] + ($numbers[$new] / $n) - ($numbers[$drop] / $n);
  612. $drop++;
  613. $yesterday++;
  614. $new++;
  615. }
  616. return $SMA;
  617. }
  618. /**
  619. * Cumulative moving average (CMA)
  620. *
  621. * Base case for initial average:
  622. * x₀
  623. * CMA₀ = --
  624. * 1
  625. *
  626. * Standard case:
  627. * xᵢ + (i * CMAᵢ₋₁)
  628. * CMAᵢ = -----------------
  629. * i + 1
  630. *
  631. * @param float[] $numbers
  632. *
  633. * @return float[] of cumulative averages
  634. */
  635. public static function cumulativeMovingAverage(array $numbers): array
  636. {
  637. $m = \count($numbers);
  638. $CMA = [];
  639. // Base case: first average is just itself
  640. $CMA[] = $numbers[0];
  641. for ($i = 1; $i < $m; $i++) {
  642. $CMA[] = (($numbers[$i]) + ($CMA[$i - 1] * $i)) / ($i + 1);
  643. }
  644. return $CMA;
  645. }
  646. /**
  647. * Weighted n-point moving average (WMA)
  648. *
  649. * Similar to simple n-point moving average,
  650. * however, each n-point has a weight associated with it,
  651. * and instead of dividing by n, we divide by the sum of the weights.
  652. *
  653. * Each weighted average = ∑(weighted values) / ∑(weights)
  654. *
  655. * @param array<int|float> $numbers
  656. * @param int $n n-point moving average
  657. * @param array<int|float> $weights Weights for each n points
  658. *
  659. * @return array<float> of averages
  660. *
  661. * @throws Exception\BadDataException if number of weights is not equal to number of n-points
  662. */
  663. public static function weightedMovingAverage(array $numbers, int $n, array $weights): array
  664. {
  665. if (\count($weights) !== $n) {
  666. throw new Exception\BadDataException('Number of weights must equal number of n-points');
  667. }
  668. $m = \count($numbers);
  669. $∑w = \array_sum($weights);
  670. $WMA = [];
  671. for ($i = 0; $i <= $m - $n; $i++) {
  672. $∑wp = \array_sum(Map\Multi::multiply(\array_slice($numbers, $i, $n), $weights));
  673. $WMA[] = $∑wp / $∑w;
  674. }
  675. return $WMA;
  676. }
  677. /**
  678. * Exponential moving average (EMA)
  679. *
  680. * The start of the EPA is seeded with the first data point.
  681. * Then each day after that:
  682. * EMAtoday = α⋅xtoday + (1-α)EMAyesterday
  683. *
  684. * where
  685. * α: coefficient that represents the degree of weighting decrease, a constant smoothing factor between 0 and 1.
  686. *
  687. * @param array<int|float> $numbers
  688. * @param int $n Length of the EPA
  689. *
  690. * @return array<float> of exponential moving averages
  691. */
  692. public static function exponentialMovingAverage(array $numbers, int $n): array
  693. {
  694. $m = \count($numbers);
  695. $α = 2 / ($n + 1);
  696. $EMA = [];
  697. // Start off by seeding with the first data point
  698. $EMA[] = $numbers[0];
  699. // Each day after: EMAtoday = α⋅xtoday + (1-α)EMAyesterday
  700. for ($i = 1; $i < $m; $i++) {
  701. $EMA[] = ($α * $numbers[$i]) + ((1 - $α) * $EMA[$i - 1]);
  702. }
  703. return $EMA;
  704. }
  705. /**************************************************************************
  706. * Averages of two numbers
  707. **************************************************************************/
  708. /**
  709. * Arithmetic-Geometric mean
  710. *
  711. * First, compute the arithmetic and geometric means of x and y, calling them a₁ and g₁ respectively.
  712. * Then, use iteration, with a₁ taking the place of x and g₁ taking the place of y.
  713. * Both a and g will converge to the same mean.
  714. * https://en.wikipedia.org/wiki/Arithmetic%E2%80%93geometric_mean
  715. *
  716. * x and y ≥ 0
  717. * If x or y = 0, then agm = 0
  718. * If x or y < 0, then NaN
  719. *
  720. * @param float $x
  721. * @param float $y
  722. *
  723. * @return float
  724. */
  725. public static function arithmeticGeometricMean(float $x, float $y): float
  726. {
  727. // x or y < 0 = NaN
  728. if ($x < 0 || $y < 0) {
  729. return \NAN;
  730. }
  731. // x or y zero = 0
  732. if ($x == 0 || $y == 0) {
  733. return 0;
  734. }
  735. // Standard case x and y > 0
  736. [$a, $g] = [$x, $y];
  737. for ($i = 0; $i <= 10; $i++) {
  738. [$a, $g] = [self::mean([$a, $g]), self::geometricMean([$a, $g])];
  739. }
  740. return $a;
  741. }
  742. /**
  743. * Convenience method for arithmeticGeometricMean
  744. *
  745. * @param float $x
  746. * @param float $y
  747. *
  748. * @return float
  749. */
  750. public static function agm(float $x, float $y): float
  751. {
  752. return self::arithmeticGeometricMean($x, $y);
  753. }
  754. /**
  755. * Logarithmic mean
  756. * A function of two non-negative numbers which is equal to their
  757. * difference divided by the logarithm of their quotient.
  758. *
  759. * https://en.wikipedia.org/wiki/Logarithmic_mean
  760. *
  761. * Mlm(x, y) = 0 if x = 0 or y = 0
  762. * x if x = y
  763. * otherwise:
  764. * y - x
  765. * -----------
  766. * ln y - ln x
  767. *
  768. * @param float $x
  769. * @param float $y
  770. *
  771. * @return float
  772. */
  773. public static function logarithmicMean(float $x, float $y): float
  774. {
  775. if ($x == 0 || $y == 0) {
  776. return 0;
  777. }
  778. if ($x == $y) {
  779. return $x;
  780. }
  781. return ($y - $x) / (\log($y) - \log($x));
  782. }
  783. /**
  784. * Heronian mean
  785. * https://en.wikipedia.org/wiki/Heronian_mean
  786. * __
  787. * H = ⅓(A + √AB + B)
  788. *
  789. * @param float $A
  790. * @param float $B
  791. *
  792. * @return float
  793. */
  794. public static function heronianMean(float $A, float $B): float
  795. {
  796. return 1 / 3 * ($A + \sqrt($A * $B) + $B);
  797. }
  798. /**
  799. * Identric mean
  800. * https://en.wikipedia.org/wiki/Identric_mean
  801. * ____
  802. * 1 / xˣ
  803. * I(x,y) = - ˣ⁻ʸ/ --
  804. * ℯ √ yʸ
  805. *
  806. * @param float $x
  807. * @param float $y
  808. *
  809. * @return float
  810. *
  811. * @throws Exception\OutOfBoundsException if x or y is ≤ 0
  812. */
  813. public static function identricMean(float $x, float $y): float
  814. {
  815. // x and y must be positive
  816. if ($x <= 0 || $y <= 0) {
  817. throw new Exception\OutOfBoundsException('x and y must be positive real numbers.');
  818. }
  819. // Special case: x if x = y
  820. if ($x == $y) {
  821. return $x;
  822. }
  823. // Standard case
  824. $ℯ = \M_E;
  825. $xˣ = $x ** $x;
  826. $yʸ = $y ** $y;
  827. return 1 / $ℯ * \pow($xˣ / $yʸ, 1 / ($x - $y));
  828. }
  829. /**
  830. * Get a report of all the averages over a list of numbers
  831. * Includes mean, median mode, geometric mean, harmonic mean, quardratic mean
  832. *
  833. * @param array<float> $numbers
  834. *
  835. * @return array{
  836. * mean: float,
  837. * median: float,
  838. * mode: float[],
  839. * geometric_mean: float,
  840. * harmonic_mean: float,
  841. * contraharmonic_mean: float,
  842. * quadratic_mean: float,
  843. * trimean: float,
  844. * iqm: float,
  845. * cubic_mean: float,
  846. * }
  847. *
  848. * @throws Exception\BadDataException
  849. * @throws Exception\OutOfBoundsException
  850. */
  851. public static function describe(array $numbers): array
  852. {
  853. return [
  854. 'mean' => self::mean($numbers),
  855. 'median' => self::median($numbers),
  856. 'mode' => self::mode($numbers),
  857. 'geometric_mean' => self::geometricMean($numbers),
  858. 'harmonic_mean' => self::harmonicMean($numbers),
  859. 'contraharmonic_mean' => self::contraharmonicMean($numbers),
  860. 'quadratic_mean' => self::quadraticMean($numbers),
  861. 'trimean' => self::trimean($numbers),
  862. 'iqm' => self::iqm($numbers),
  863. 'cubic_mean' => self::cubicMean($numbers),
  864. ];
  865. }
  866. }