123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766 |
- <?php
- namespace MathPHP\Statistics;
- use MathPHP\Exception;
- /**
- * Descriptive statistics
- * Summary statistics that quantitatively describe or summarize features of a collection of information.
- * https://en.wikipedia.org/wiki/Descriptive_statistics
- */
- class Descriptive
- {
- public const POPULATION = true;
- public const SAMPLE = false;
- /**
- * Range - the difference between the largest and smallest values
- * It is the size of the smallest interval which contains all the data.
- * It provides an indication of statistical dispersion.
- * (https://en.wikipedia.org/wiki/Range_(statistics))
- *
- * R = max x - min x
- *
- * @param float[] $numbers
- *
- * @return float
- *
- * @throws Exception\BadDataException if the input array of numbers is empty
- */
- public static function range(array $numbers): float
- {
- if (empty($numbers)) {
- throw new Exception\BadDataException('Cannot find the range of an empty list of numbers');
- }
- return \max($numbers) - \min($numbers);
- }
- /**
- * Midrange - the mean of the largest and smallest values
- * It is the midpoint of the range; as such, it is a measure of central tendency.
- * (https://en.wikipedia.org/wiki/Mid-range)
- *
- * max x + min x
- * M = -------------
- * 2
- *
- * @param float[] $numbers
- *
- * @return float
- *
- * @throws Exception\BadDataException if the input array of numbers is empty
- */
- public static function midrange(array $numbers): float
- {
- if (empty($numbers)) {
- throw new Exception\BadDataException('Cannot find the midrange of an empty list of numbers');
- }
- return Average::mean([\min($numbers), \max($numbers)]);
- }
- /**
- * Variance
- *
- * Variance measures how far a set of numbers are spread out.
- * A variance of zero indicates that all the values are identical.
- * Variance is always non-negative: a small variance indicates that the data points
- * tend to be very close to the mean (expected value) and hence to each other.
- * A high variance indicates that the data points are very spread out around the mean
- * and from each other.
- * (https://en.wikipedia.org/wiki/Variance)
- *
- * ∑⟮xᵢ - μ⟯²
- * σ² = ----------
- * ν
- *
- * Generalized method that allows setting the degrees of freedom.
- * For population variance, set d.f. (ν) to n
- * For sample variance, set d.f (ν) to n - 1
- * Or use populationVariance or sampleVariance convenience methods.
- *
- * μ is the population mean
- * ν is the degrees of freedom, which usually is
- * the number of numbers in the population set or n - 1 for sample set.
- *
- * @param float[] $numbers
- * @param int $ν degrees of freedom
- *
- * @return float
- *
- * @throws Exception\BadDataException if the input array of numbers is empty
- * @throws Exception\OutOfBoundsException if degrees of freedom is ≤ 0
- */
- public static function variance(array $numbers, int $ν): float
- {
- if (empty($numbers)) {
- throw new Exception\BadDataException('Cannot find the variance of an empty list of numbers');
- }
- if ($ν <= 0) {
- throw new Exception\OutOfBoundsException('Degrees of freedom must be > 0');
- }
- $∑⟮xᵢ − μ⟯² = RandomVariable::sumOfSquaresDeviations($numbers);
- return $∑⟮xᵢ − μ⟯² / $ν;
- }
- /**
- * Population variance - Use when all possible observations of the system are present.
- * If used with a subset of data (sample variance), it will be a biased variance.
- *
- * ∑⟮xᵢ - μ⟯²
- * σ² = ----------
- * N
- *
- * μ is the population mean
- * N is the number of numbers in the population set
- *
- * @param float[] $numbers
- *
- * @return float
- *
- * @throws Exception\BadDataException if the input array of numbers is empty
- * @throws Exception\OutOfBoundsException if degrees of freedom is ≤ 0
- */
- public static function populationVariance(array $numbers): float
- {
- $N = \count($numbers);
- return self::variance($numbers, $N);
- }
- /**
- * Unbiased sample variance
- * Use when only a subset of all possible observations of the system are present.
- *
- * ∑⟮xᵢ - x̄⟯²
- * S² = ----------
- * n - 1
- *
- * x̄ is the sample mean
- * n is the number of numbers in the sample set
- *
- * @param float[] $numbers
- *
- * @return float
- *
- * @throws Exception\BadDataException if the input array of numbers is empty
- * @throws Exception\OutOfBoundsException if degrees of freedom is ≤ 0
- */
- public static function sampleVariance(array $numbers): float
- {
- if (\count($numbers) == 1) {
- return 0;
- }
- $n = \count($numbers);
- return self::variance($numbers, $n - 1);
- }
- /**
- * Weighted sample variance
- *
- * Biased case
- *
- * ∑wᵢ⟮xᵢ - μw⟯²
- * σ²w = ----------
- * ∑wᵢ
- *
- * Unbiased estimator for frequency weights
- *
- * ∑wᵢ⟮xᵢ - μw⟯²
- * σ²w = ----------
- * ∑wᵢ - 1
- *
- * μw is the weighted mean
- *
- * https://en.wikipedia.org/wiki/Weighted_arithmetic_mean#Weighted_sample_variance
- *
- * @param float[] $numbers
- * @param float[] $weights
- * @param bool $biased
- *
- * @return float
- *
- * @throws Exception\BadDataException if the number of numbers and weights are not equal
- */
- public static function weightedSampleVariance(array $numbers, array $weights, bool $biased = false): float
- {
- if (\count($numbers) === 1) {
- return 0;
- }
- if (\count($numbers) !== \count($weights)) {
- throw new Exception\BadDataException('Numbers and weights must have the same number of elements.');
- }
- $μw = Average::weightedMean($numbers, $weights);
- $∑wᵢ⟮xᵢ − μw⟯² = \array_sum(\array_map(
- function ($xᵢ, $wᵢ) use ($μw) {
- return $wᵢ * \pow(($xᵢ - $μw), 2);
- },
- $numbers,
- $weights
- ));
- $∑wᵢ = $biased
- ? \array_sum($weights)
- : \array_sum($weights) - 1;
- return $∑wᵢ⟮xᵢ − μw⟯² / $∑wᵢ;
- }
- /**
- * Standard deviation
- * A measure that is used to quantify the amount of variation or dispersion of a set of data values.
- * A low standard deviation indicates that the data points tend to be close to the mean
- * (also called the expected value) of the set.
- * A high standard deviation indicates that the data points are spread out over a wider range of values.
- * (https://en.wikipedia.org/wiki/Standard_deviation)
- *
- * σ = √⟮σ²⟯ = √⟮variance⟯
- * SD+ = √⟮σ²⟯ = √⟮sample variance⟯
- *
- * @param float[] $numbers
- * @param bool $SD+ : true returns SD+ (uses population variance);
- * false returns SD (uses sample variance);
- * Default is false (SD (sample variance))
- *
- * @return float
- *
- * @throws Exception\BadDataException if the input array of numbers is empty
- * @throws Exception\OutOfBoundsException if degrees of freedom is ≤ 0
- */
- public static function standardDeviation(array $numbers, bool $SD+ = false): float
- {
- if (empty($numbers)) {
- throw new Exception\BadDataException('Cannot find the standard deviation of an empty list of numbers');
- }
- return $SD+
- ? \sqrt(self::populationVariance($numbers))
- : \sqrt(self::sampleVariance($numbers));
- }
- /**
- * sd - Standard deviation - convenience method
- *
- * @param float[] $numbers
- * @param bool $SD+ : true returns SD+ (uses population variance);
- * false returns SD (uses sample variance);
- * Default is false (SD (sample variance))
- *
- * @return float
- *
- * @throws Exception\BadDataException if the input array of numbers is empty
- * @throws Exception\OutOfBoundsException if degrees of freedom is ≤ 0
- */
- public static function sd(array $numbers, bool $SD+ = false): float
- {
- return self::standardDeviation($numbers, $SD+);
- }
- /**
- * MAD - mean absolute deviation
- *
- * The average of the absolute deviations from a central point.
- * It is a summary statistic of statistical dispersion or variability.
- * (https://en.wikipedia.org/wiki/Average_absolute_deviation)
- *
- * ∑|xᵢ - x̄|
- * MAD = ---------
- * N
- *
- * x̄ is the mean
- * N is the number of numbers in the population set
- *
- * @param float[] $numbers
- *
- * @return float
- *
- * @throws Exception\BadDataException if the input array of numbers is empty
- */
- public static function meanAbsoluteDeviation(array $numbers): float
- {
- if (empty($numbers)) {
- throw new Exception\BadDataException('Cannot find the mean absolute deviation of an empty list of numbers');
- }
- $x = Average::mean($numbers);
- $∑│xᵢ − x│ = \array_sum(\array_map(
- function ($xᵢ) use ($x) {
- return \abs($xᵢ - $x);
- },
- $numbers
- ));
- $N = \count($numbers);
- return $∑│xᵢ − x│ / $N;
- }
- /**
- * MAD - median absolute deviation
- *
- * The average of the absolute deviations from a central point.
- * It is a summary statistic of statistical dispersion or variability.
- * It is a robust measure of the variability of a univariate sample of quantitative data.
- * (https://en.wikipedia.org/wiki/Median_absolute_deviation)
- *
- * MAD = median(|xᵢ - x̄|)
- *
- * x̄ is the median
- *
- * @param float[] $numbers
- *
- * @return float
- *
- * @throws Exception\BadDataException if the input array of numbers is empty
- */
- public static function medianAbsoluteDeviation(array $numbers): float
- {
- if (empty($numbers)) {
- throw new Exception\BadDataException('Cannot find the median absolute deviation of an empty list of numbers');
- }
- $x = Average::median($numbers);
- return Average::median(\array_map(
- function ($xᵢ) use ($x) {
- return \abs($xᵢ - $x);
- },
- $numbers
- ));
- }
- /**
- * Quartiles
- * Three points that divide the data set into four equal groups, each group comprising a quarter of the data.
- * https://en.wikipedia.org/wiki/Quartile
- *
- * There are multiple methods for computing quartiles:
- * - Inclusive
- * - Exclusive
- *
- * @param float[] $numbers
- * @param string $method What quartile method to use (optional - default: exclusive)
- *
- * @return float[] (0%, Q1, Q2, Q3, 100%, IQR)
- *
- * @throws Exception\BadDataException if the input array of numbers is empty
- */
- public static function quartiles(array $numbers, string $method = 'exclusive'): array
- {
- switch (strtolower($method)) {
- case 'inclusive':
- return self::quartilesInclusive($numbers);
- case 'exclusive':
- return self::quartilesExclusive($numbers);
- default:
- return self::quartilesExclusive($numbers);
- }
- }
- /**
- * Quartiles - Exclusive method
- * Three points that divide the data set into four equal groups, each group comprising a quarter of the data.
- * https://en.wikipedia.org/wiki/Quartile
- *
- * 0% is smallest number
- * Q1 (25%) is first quartile (lower quartile, 25th percentile)
- * Q2 (50%) is second quartile (median, 50th percentile)
- * Q3 (75%) is third quartile (upper quartile, 75th percentile)
- * 100% is largest number
- * interquartile_range is the difference between the upper and lower quartiles. (IQR = Q₃ - Q₁)
- *
- * Method used
- * - Use the median to divide the ordered data set into two halves.
- * - If there are an odd number of data points in the original ordered data set, do not include the median
- * (the central value in the ordered list) in either half.
- * - If there are an even number of data points in the original ordered data set,
- * split this data set exactly in half.
- * - The lower quartile value is the median of the lower half of the data.
- * The upper quartile value is the median of the upper half of the data.
- *
- * This rule is employed by the TI-83 calculator boxplot and "1-Var Stats" functions.
- * This is the most basic method that is commonly taught in math textbooks.
- * It is "method 1" from Wikipedia.
- *
- * @param float[] $numbers
- *
- * @return array{
- * "0%": float,
- * "Q1": float,
- * "Q2": float,
- * "Q3": float,
- * "100%": float,
- * "IQR": float,
- * }
- *
- * @throws Exception\BadDataException if the input array of numbers is empty
- */
- public static function quartilesExclusive(array $numbers): array
- {
- if (empty($numbers)) {
- throw new Exception\BadDataException('Cannot find the quartiles of an empty list of numbers');
- }
- if (\count($numbers) === 1) {
- $number = \array_pop($numbers);
- return [
- '0%' => $number,
- 'Q1' => $number,
- 'Q2' => $number,
- 'Q3' => $number,
- '100%' => $number,
- 'IQR' => 0,
- ];
- }
- \sort($numbers);
- $length = \count($numbers);
- if ($length % 2 == 0) {
- $lower_half = \array_slice($numbers, 0, (int)($length / 2));
- $upper_half = \array_slice($numbers, (int)($length / 2));
- } else {
- $lower_half = \array_slice($numbers, 0, \intdiv($length, 2));
- $upper_half = \array_slice($numbers, \intdiv($length, 2) + 1);
- }
- $lower_quartile = Average::median($lower_half);
- $upper_quartile = Average::median($upper_half);
- return [
- '0%' => \min($numbers),
- 'Q1' => $lower_quartile,
- 'Q2' => Average::median($numbers),
- 'Q3' => $upper_quartile,
- '100%' => \max($numbers),
- 'IQR' => $upper_quartile - $lower_quartile,
- ];
- }
- /**
- * Quartiles - Inclusive method (R method)
- * Three points that divide the data set into four equal groups, each group comprising a quarter of the data.
- * https://en.wikipedia.org/wiki/Quartile
- *
- * 0% is smallest number
- * Q1 (25%) is first quartile (lower quartile, 25th percentile)
- * Q2 (50%) is second quartile (median, 50th percentile)
- * Q3 (75%) is third quartile (upper quartile, 75th percentile)
- * 100% is largest number
- * interquartile_range is the difference between the upper and lower quartiles. (IQR = Q₃ - Q₁)
- *
- * Method used
- * - Use the median to divide the ordered data set into two halves.
- * - If there are an odd number of data points in the original ordered data set,
- * include the median (the central value in the ordered list) in both halves.
- * - If there are an even number of data points in the original ordered data set,
- * split this data set exactly in half.
- * - The lower quartile value is the median of the lower half of the data.
- * The upper quartile value is the median of the upper half of the data.
- *
- * The values found by this method are also known as "Tukey's hinges".
- * This is the "method 2" from Wikipedia.
- *
- * @param float[] $numbers
- *
- * @return array{
- * "0%": float,
- * "Q1": float,
- * "Q2": float,
- * "Q3": float,
- * "100%": float,
- * "IQR": float,
- * }
- *
- * @throws Exception\BadDataException if the input array of numbers is empty
- */
- public static function quartilesInclusive(array $numbers): array
- {
- if (empty($numbers)) {
- throw new Exception\BadDataException('Cannot find the quartiles of an empty list of numbers');
- }
- \sort($numbers);
- $length = \count($numbers);
- if ($length % 2 == 0) {
- $lower_half = \array_slice($numbers, 0, (int)($length / 2));
- $upper_half = \array_slice($numbers, (int)($length / 2));
- } else {
- $lower_half = \array_slice($numbers, 0, \intdiv($length, 2));
- $upper_half = \array_slice($numbers, \intdiv($length, 2) + 1);
- // Add median to both halves
- $median = Average::median($numbers);
- \array_push($lower_half, $median);
- \array_unshift($upper_half, $median);
- }
- $lower_quartile = Average::median($lower_half);
- $upper_quartile = Average::median($upper_half);
- return [
- '0%' => \min($numbers),
- 'Q1' => $lower_quartile,
- 'Q2' => Average::median($numbers),
- 'Q3' => $upper_quartile,
- '100%' => \max($numbers),
- 'IQR' => $upper_quartile - $lower_quartile,
- ];
- }
- /**
- * IQR - Interquartile range (midspread, middle fifty)
- * A measure of statistical dispersion.
- * Difference between the upper and lower quartiles.
- * https://en.wikipedia.org/wiki/Interquartile_range
- *
- * IQR = Q₃ - Q₁
- *
- * @param float[] $numbers
- * @param string $method What quartile method to use (optional - default: exclusive)
- *
- * @return float
- *
- * @throws Exception\BadDataException if the input array of numbers is empty
- */
- public static function interquartileRange(array $numbers, string $method = 'exclusive'): float
- {
- return self::quartiles($numbers, $method)['IQR'];
- }
- /**
- * IQR - Interquartile range (midspread, middle fifty)
- * Convenience wrapper function for interquartileRange.
- *
- * @param float[] $numbers
- * @param string $method What quartile method to use (optional - default: exclusive)
- *
- * @return float
- *
- * @throws Exception\BadDataException if the input array of numbers is empty
- */
- public static function iqr(array $numbers, string $method = 'exclusive'): float
- {
- return self::quartiles($numbers, $method)['IQR'];
- }
- /**
- * Compute the P-th percentile of a list of numbers
- *
- * Linear interpolation between closest ranks method - Second variant, C = 1
- * P-th percentile (0 <= P <= 100) of a list of N ordered values (sorted from least to greatest)
- * Similar method used in NumPy and Excel
- * https://en.wikipedia.org/wiki/Percentile#Second_variant.2C_.7F.27.22.60UNIQ--postMath-00000043-QINU.60.22.27.7F
- *
- * P
- * x - --- (N - 1) + 1
- * 100
- *
- * P = percentile
- * N = number of elements in list
- *
- * ν(x) = νₓ + x%1(νₓ₊₁ - νₓ)
- *
- * ⌊x⌋ = integer part of x
- * x%1 = fraction part of x
- * νₓ = number in position x in sorted list of numbers
- * νₓ₊₁ = number in position x + 1 in sorted list of number
- *
- * @param float[] $numbers
- * @param float $P percentile to calculate
- *
- * @return float in list corresponding to P percentile
- *
- * @throws Exception\BadDataException if the input array of numbers is empty
- * @throws Exception\OutOfBoundsException if $P percentile is not between 0 and 100
- */
- public static function percentile(array $numbers, float $P): float
- {
- if (empty($numbers)) {
- throw new Exception\BadDataException('Cannot find the P-th percentile of an empty list of numbers');
- }
- if ($P < 0 || $P > 100) {
- throw new Exception\OutOfBoundsException('Percentile P must be between 0 and 100.');
- }
- $N = \count($numbers);
- if ($N === 1) {
- return \array_shift($numbers);
- }
- \sort($numbers);
- if ($P == 100) {
- return $numbers[$N - 1];
- }
- $x = ($P / 100) * ($N - 1) + 1;
- $⌊x⌋ = \intval($x);
- $x%1 = $x - $⌊x⌋;
- $νₓ = $numbers[$⌊x⌋ - 1];
- $νₓ₊₁ = $numbers[$⌊x⌋];
- return $νₓ + $x%1 * ($νₓ₊₁ - $νₓ);
- }
- /**
- * Midhinge
- * The average of the first and third quartiles and is thus a measure of location.
- * Equivalently, it is the 25% trimmed mid-range or 25% midsummary; it is an L-estimator.
- * https://en.wikipedia.org/wiki/Midhinge
- *
- * Midhinge = (first quartile, third quartile) / 2
- *
- * @param float[] $numbers
- *
- * @return float
- *
- * @throws Exception\BadDataException if the input array of numbers is empty
- */
- public static function midhinge(array $numbers): float
- {
- $quartiles = self::quartiles($numbers);
- $Q1 = $quartiles['Q1'];
- $Q2 = $quartiles['Q3'];
- return Average::mean([$Q1, $Q2]);
- }
- /**
- * Coefficient of variation (cᵥ)
- * Also known as relative standard deviation (RSD)
- *
- * A standardized measure of dispersion of a probability distribution or
- * frequency distribution. It is often expressed as a percentage.
- * The ratio of the standard deviation to the mean.
- * https://en.wikipedia.org/wiki/Coefficient_of_variation
- *
- * σ
- * cᵥ = -
- * μ
- *
- * @param float[] $numbers
- *
- * @return float
- *
- * @throws Exception\BadDataException if the input array of numbers is empty
- * @throws Exception\OutOfBoundsException if degrees of freedom is ≤ 0
- */
- public static function coefficientOfVariation(array $numbers): float
- {
- $σ = self::standardDeviation($numbers);
- $μ = Average::mean($numbers);
- return $σ / $μ;
- }
- /**
- * Get a report of all the descriptive statistics over a list of numbers
- * Includes mean, median, mode, range, midrange, variance, standard deviation, quartiles, etc.
- *
- * @param float[] $numbers
- * @param bool $population : true means all possible observations of the system are present;
- * false means a sample is used.
- *
- * @return array{
- * n: int<0, max>,
- * min: float|false,
- * max: float|false,
- * mean: float,
- * median: float,
- * mode: float[],
- * range: float,
- * midrange: float,
- * variance: float,
- * sd: float,
- * cv: float,
- * mean_mad: float,
- * median_mad: float,
- * quartiles: float[],
- * midhinge: float,
- * skewness: float|null,
- * ses: float|null,
- * kurtosis: float|null,
- * sek: float|null,
- * sem: float,
- * ci_95: array{ci: float|null, lower_bound: float|null, upper_bound: float|null},
- * ci_99: array{ci: float|null, lower_bound: float|null, upper_bound: float|null},
- * }
- *
- * @throws Exception\OutOfBoundsException
- * @throws Exception\BadDataException
- */
- public static function describe(array $numbers, bool $population = false): array
- {
- $n = \count($numbers);
- $μ = Average::mean($numbers);
- $σ = self::standardDeviation($numbers, $population);
- return [
- 'n' => $n,
- 'min' => \min($numbers),
- 'max' => \max($numbers),
- 'mean' => $μ,
- 'median' => Average::median($numbers),
- 'mode' => Average::mode($numbers),
- 'range' => self::range($numbers),
- 'midrange' => self::midrange($numbers),
- 'variance' => $population ? self::populationVariance($numbers) : self::sampleVariance($numbers),
- 'sd' => $σ,
- 'cv' => $μ ? $σ / $μ : \NAN,
- 'mean_mad' => self::meanAbsoluteDeviation($numbers),
- 'median_mad' => self::medianAbsoluteDeviation($numbers),
- 'quartiles' => self::quartiles($numbers),
- 'midhinge' => self::midhinge($numbers),
- 'skewness' => $population
- ? ($n > 0 ? RandomVariable::populationSkewness($numbers) : null)
- : ($n >= 3 ? RandomVariable::skewness($numbers) : null),
- 'ses' => $n > 2 ? RandomVariable::ses($n) : null,
- 'kurtosis' => $population
- ? ($n > 3 ? RandomVariable::populationKurtosis($numbers) : null)
- : ($n > 0 ? RandomVariable::sampleKurtosis($numbers) : null),
- 'sek' => $n > 3 ? RandomVariable::sek($n) : null,
- 'sem' => RandomVariable::standardErrorOfTheMean($numbers),
- 'ci_95' => RandomVariable::confidenceInterval($μ, $n, $σ, '95'),
- 'ci_99' => RandomVariable::confidenceInterval($μ, $n, $σ, '99'),
- ];
- }
- /**
- * Five number summary
- * A descriptive statistic that provides information about a set of observations.
- * It consists of the five most important sample percentiles:
- * 1) the sample minimum (smallest observation)
- * 2) the lower quartile or first quartile
- * 3) the median (middle value)
- * 4) the upper quartile or third quartile
- * 5) the sample maximum (largest observation)
- *
- * https://en.wikipedia.org/wiki/Five-number_summary
- *
- * @param array<int|float> $numbers
- *
- * @return array{
- * min: float|int|false,
- * Q1: float,
- * median: float,
- * Q3: float,
- * max: float|int|false,
- * }
- *
- * @throws Exception\BadDataException
- */
- public static function fiveNumberSummary(array $numbers): array
- {
- $quartiles = self::quartiles($numbers);
- return [
- 'min' => \min($numbers),
- 'Q1' => $quartiles['Q1'],
- 'median' => Average::median($numbers),
- 'Q3' => $quartiles['Q3'],
- 'max' => \max($numbers),
- ];
- }
- }
|