Outlier.php 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. <?php
  2. namespace MathPHP\Statistics;
  3. use MathPHP\Exception;
  4. use MathPHP\Functions\Map\Single;
  5. use MathPHP\Probability\Distribution\Continuous\StudentT;
  6. /**
  7. * Tests for outliers in data
  8. * - Grubbs' Test
  9. */
  10. class Outlier
  11. {
  12. public const ONE_SIDED = 'one';
  13. public const TWO_SIDED = 'two';
  14. public const ONE_SIDED_LOWER = 'lower';
  15. public const ONE_SIDED_UPPER = 'upper';
  16. /**
  17. * The Grubbs' Statistic (G) of a series of data
  18. *
  19. * G is the largest z-score for a set of data
  20. * The statistic can be calculated, looking at only the maximum value ("upper")
  21. * the minimum value ("lower"), or the data point with the largest residual ("two")
  22. *
  23. * https://en.wikipedia.org/wiki/Grubbs%27_test_for_outliers
  24. * https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h1.htm
  25. *
  26. * Two-sided Grubbs' test statistic - largest difference from the mean is an outlier
  27. *
  28. * max❘Yᵢ − μ❘
  29. * G = ----------
  30. * σ
  31. *
  32. * One-sided Grubbs' test statistic - minimum value is an outlier
  33. *
  34. * μ - Ymin
  35. * G = --------
  36. * σ
  37. *
  38. * One-sided Grubbs' test statistic - maximum value is an outlier
  39. *
  40. * Ymax - μ
  41. * G = --------
  42. * σ
  43. *
  44. * @param float[] $data
  45. * @param string $typeOfTest ("upper" "lower", or "two")
  46. *
  47. * @return float G (Grubb's test statistic)
  48. *
  49. * @throws Exception\BadDataException
  50. * @throws Exception\OutOfBoundsException
  51. * @throws Exception\BadParameterException if the type of test is not valid
  52. */
  53. public static function grubbsStatistic(array $data, string $typeOfTest = self::TWO_SIDED): float
  54. {
  55. $μ = Average::mean($data);
  56. $σ = Descriptive::standardDeviation($data);
  57. if ($typeOfTest === self::TWO_SIDED) {
  58. $max❘Yᵢ − μ❘ = \max(Single::abs(Single::subtract($data, $μ)));
  59. return $max❘Yᵢ − μ❘ / $σ;
  60. }
  61. if ($typeOfTest === self::ONE_SIDED_LOWER) {
  62. $yMin = \min($data);
  63. return ($μ - $yMin) / $σ;
  64. }
  65. if ($typeOfTest === self::ONE_SIDED_UPPER) {
  66. $yMax = \max($data);
  67. return ($yMax - $μ) / $σ;
  68. }
  69. throw new Exception\BadParameterException("{$typeOfTest} is not a valid Grubbs; test");
  70. }
  71. /**
  72. * The critical Grubbs Value
  73. *
  74. * The critical Grubbs' value is used to determine if a value in a set of data is likely to be an outlier.
  75. *
  76. * https://en.wikipedia.org/wiki/Grubbs%27_test_for_outliers
  77. * https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h1.htm
  78. *
  79. * ___________
  80. * (n - 1) / T²
  81. * Critical value = ------- \ / ----------
  82. * √n \/ n - 2 + T²
  83. *
  84. * T = Critical value of the t distribution with (N-2) degrees of freedom and a significance level of α/(2N)
  85. * For the one-sided tests, replace α/(2N) with α/N.
  86. *
  87. * @param float $𝛼 Significance level
  88. * @param int $n Size of the data set
  89. * @param string $typeOfTest ('one' or 'two') one or two-tailed test
  90. *
  91. * @return float
  92. *
  93. * @throws Exception\BadParameterException
  94. */
  95. public static function grubbsCriticalValue(float $𝛼, int $n, string $typeOfTest): float
  96. {
  97. self::validateGrubbsCriticalValueTestType($typeOfTest);
  98. $studentT = new StudentT($n - 2);
  99. $T = $typeOfTest === self::ONE_SIDED
  100. ? $studentT->inverse($𝛼 / $n)
  101. : $studentT->inverse($𝛼 / (2 * $n));
  102. return (($n - 1) / \sqrt($n)) * \sqrt($T ** 2 / ($n - 2 + $T ** 2));
  103. }
  104. /* ********************** *
  105. * PRIVATE HELPER METHODS
  106. * ********************** */
  107. /**
  108. * Validate the type of test is two sided, or one sided lower or upper
  109. *
  110. * @param string $typeOfTest
  111. *
  112. * @throws Exception\BadParameterException
  113. */
  114. private static function validateGrubbsCriticalValueTestType(string $typeOfTest): void
  115. {
  116. if (!\in_array($typeOfTest, [self::ONE_SIDED, self::TWO_SIDED])) {
  117. throw new Exception\BadParameterException("{$typeOfTest} is not a valid Grubbs' test");
  118. }
  119. }
  120. }