KernelDensityEstimation.php 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. <?php
  2. namespace MathPHP\Statistics;
  3. use MathPHP\Exception;
  4. use MathPHP\Probability\Distribution\Continuous;
  5. /**
  6. * Kernel Density Estimate
  7. * https://en.wikipedia.org/wiki/Kernel_density_estimation
  8. *
  9. * ____
  10. * 1 \ / (x - xᵢ) \
  11. * KDE(x) = ----- * > K | ------- |
  12. * n * h / \ h /
  13. * ‾‾‾‾
  14. * The kernel function K, must be a non-negative function with a mean of 0 and integrates to 1
  15. */
  16. class KernelDensityEstimation
  17. {
  18. /** @var array<float> Data used for the esimtation */
  19. protected $data;
  20. /** @var int number of data points */
  21. protected $n;
  22. /** @var float bandwidth */
  23. protected $h;
  24. /** @var callable kernel function */
  25. protected $kernel;
  26. // Available built-in kernel functions
  27. public const STANDARD_NORMAL = 'StandardNormal';
  28. public const NORMAL = 'Normal';
  29. public const UNIFORM = 'Uniform';
  30. public const TRIANGULAR = 'Triangular';
  31. public const EPANECHNIKOV = 'Epanechnikov';
  32. public const TRICUBE = 'Tricube';
  33. /**
  34. * Constructor
  35. *
  36. * @param array<float> $data data used for the estimation
  37. * @param float|null $h the bandwidth
  38. * @param callable|string|null $kernel a function used to generate the KDE
  39. *
  40. * @throws Exception\BadDataException if data set is empty
  41. * @throws Exception\OutOfBoundsException h ≤ 0
  42. * @throws Exception\BadParameterException
  43. */
  44. public function __construct(array $data, float $h = null, $kernel = null)
  45. {
  46. $this->n = \count($data);
  47. if ($this->n === 0) {
  48. throw new Exception\BadDataException('Dataset cannot be empty.');
  49. }
  50. $this->data = $data;
  51. $this->setBandwidth($h);
  52. $this->setKernelFunction($kernel);
  53. }
  54. /**
  55. * Set Bandwidth
  56. *
  57. * @param float|null $h the bandwidth
  58. *
  59. * @throws Exception\OutOfBoundsException if h ≤ 0
  60. */
  61. public function setBandwidth(float $h = null): void
  62. {
  63. if ($h === null) {
  64. $this->h = $this->getDefaultBandwidth();
  65. return;
  66. }
  67. if ($h <= 0) {
  68. throw new Exception\OutOfBoundsException("Bandwidth must be > 0. h = $h");
  69. }
  70. $this->h = $h;
  71. }
  72. /**
  73. * Default bandwidth for when one is not provided.
  74. * Uses the normal distribution approximation bandwidth estimator.
  75. * https://en.wikipedia.org/wiki/Kernel_density_estimation#A_rule-of-thumb_bandwidth_estimator
  76. *
  77. * ⅕
  78. * / 4σ⁵ \
  79. * h = | --- |
  80. * \ 3n /
  81. *
  82. *
  83. * @return float
  84. *
  85. * @throws Exception\OutOfBoundsException
  86. */
  87. private function getDefaultBandwidth(): float
  88. {
  89. $4σ⁵ = 4 * Descriptive::standardDeviation($this->data) ** 5;
  90. $3n = 3 * $this->n;
  91. $⅕ = 0.2;
  92. return ($4σ⁵ / $3n) ** $⅕;
  93. }
  94. /**
  95. * Set The Kernel Function
  96. *
  97. * If the parameter is a string, check that there is a function with that name
  98. * in the "library". If it's a callable, use that function.
  99. *
  100. * @param callable|string|null $kernel
  101. *
  102. * @throws Exception\BadParameterException if $kernel is not a string or callable
  103. * @throws Exception\BadDataException
  104. * @throws Exception\OutOfBoundsException
  105. */
  106. public function setKernelFunction($kernel = null): void
  107. {
  108. if ($kernel === null) {
  109. $this->kernel = $this->getKernelFunctionFromLibrary(self::STANDARD_NORMAL);
  110. } elseif (\is_string($kernel)) {
  111. $this->kernel = $this->getKernelFunctionFromLibrary($kernel);
  112. } elseif (\is_callable($kernel)) {
  113. $this->kernel = $kernel;
  114. } else {
  115. throw new Exception\BadParameterException('Kernel must be a callable or a string. Type is: ' . \gettype($kernel));
  116. }
  117. }
  118. /**
  119. * Select the kernel function from one of the built-in provided functions.
  120. *
  121. * @param string $kernel Name of built-in kernel function
  122. *
  123. * @return callable kernel function
  124. *
  125. * @throws Exception\BadDataException if the name of the kernel function is not one of the built-in functions
  126. * @throws Exception\OutOfBoundsException
  127. */
  128. private function getKernelFunctionFromLibrary(string $kernel): callable
  129. {
  130. switch ($kernel) {
  131. case self::STANDARD_NORMAL:
  132. return function ($x) {
  133. $standardNormal = new Continuous\StandardNormal();
  134. return $standardNormal->pdf($x);
  135. };
  136. case self::NORMAL:
  137. $μ = 0;
  138. $σ = Descriptive::standardDeviation($this->data);
  139. return function ($x) use ($μ, $σ) {
  140. $normal = new Continuous\Normal($μ, $σ);
  141. return $normal->pdf($x);
  142. };
  143. case self::UNIFORM:
  144. return function ($x) {
  145. if (\abs($x) > 1) {
  146. return 0;
  147. } else {
  148. return .5;
  149. }
  150. };
  151. case self::TRIANGULAR:
  152. return function ($x) {
  153. if (\abs($x) > 1) {
  154. return 0;
  155. } else {
  156. return 1 - \abs($x);
  157. }
  158. };
  159. case self::EPANECHNIKOV:
  160. return function ($x) {
  161. if (\abs($x) > 1) {
  162. return 0;
  163. } else {
  164. return .75 * (1 - $x ** 2);
  165. }
  166. };
  167. case self::TRICUBE:
  168. return function ($x) {
  169. if (\abs($x) > 1) {
  170. return 0;
  171. } else {
  172. return 70 / 81 * ((1 - \abs($x) ** 3) ** 3);
  173. }
  174. };
  175. default:
  176. throw new Exception\BadDataException("Unknown kernel function: $kernel");
  177. }
  178. }
  179. /**
  180. * Evaluate the kernel density estimation at $x
  181. *
  182. * ____
  183. * 1 \ / (x - xᵢ) \
  184. * KDE(x) = ----- * > K | ------- |
  185. * n * h / \ h /
  186. * ‾‾‾‾
  187. * @param float $x the value to evaluate
  188. *
  189. * @return float the kernel density estimate at $x
  190. */
  191. public function evaluate(float $x): float
  192. {
  193. $h = $this->h;
  194. $n = $this->n;
  195. $scale = \array_map(
  196. function ($xᵢ) use ($x, $h) {
  197. return ($x - $xᵢ) / $h;
  198. },
  199. $this->data
  200. );
  201. $K = \array_map($this->kernel, $scale);
  202. $density = \array_sum($K) / ($n * $h);
  203. return $density;
  204. }
  205. }