Preg.php 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215
  1. <?php
  2. declare(strict_types=1);
  3. /*
  4. * This file is part of PHP CS Fixer.
  5. *
  6. * (c) Fabien Potencier <fabien@symfony.com>
  7. * Dariusz Rumiński <dariusz.ruminski@gmail.com>
  8. *
  9. * This source file is subject to the MIT license that is bundled
  10. * with this source code in the file LICENSE.
  11. */
  12. namespace PhpCsFixer;
  13. /**
  14. * This class replaces preg_* functions to better handling UTF8 strings,
  15. * ensuring no matter "u" modifier is present or absent subject will be handled correctly.
  16. *
  17. * @author Kuba Werłos <werlos@gmail.com>
  18. *
  19. * @internal
  20. */
  21. final class Preg
  22. {
  23. /**
  24. * @param array<array-key, mixed> $matches
  25. * @param int-mask<PREG_OFFSET_CAPTURE, PREG_UNMATCHED_AS_NULL> $flags
  26. *
  27. * @param-out ($flags is PREG_OFFSET_CAPTURE
  28. * ? array<array-key, array{string, 0|positive-int}|array{'', -1}>
  29. * : ($flags is PREG_UNMATCHED_AS_NULL
  30. * ? array<array-key, string|null>
  31. * : ($flags is int-mask<PREG_OFFSET_CAPTURE, PREG_UNMATCHED_AS_NULL>&768
  32. * ? array<array-key, array{string, 0|positive-int}|array{null, -1}>
  33. * : array<array-key, string>
  34. * )
  35. * )
  36. * ) $matches
  37. *
  38. * @throws PregException
  39. */
  40. public static function match(string $pattern, string $subject, ?array &$matches = null, int $flags = 0, int $offset = 0): bool
  41. {
  42. $result = @preg_match(self::addUtf8Modifier($pattern), $subject, $matches, $flags, $offset);
  43. if (false !== $result && PREG_NO_ERROR === preg_last_error()) {
  44. return 1 === $result;
  45. }
  46. $result = @preg_match(self::removeUtf8Modifier($pattern), $subject, $matches, $flags, $offset);
  47. if (false !== $result && PREG_NO_ERROR === preg_last_error()) {
  48. return 1 === $result;
  49. }
  50. throw self::newPregException(preg_last_error(), preg_last_error_msg(), __METHOD__, $pattern);
  51. }
  52. /**
  53. * @param array<array-key, mixed> $matches
  54. * @param int-mask<PREG_PATTERN_ORDER, PREG_SET_ORDER, PREG_OFFSET_CAPTURE, PREG_UNMATCHED_AS_NULL> $flags
  55. *
  56. * @param-out ($flags is PREG_PATTERN_ORDER
  57. * ? array<list<string>>
  58. * : ($flags is PREG_SET_ORDER
  59. * ? list<array<string>>
  60. * : ($flags is int-mask<PREG_PATTERN_ORDER, PREG_OFFSET_CAPTURE>&(256|257)
  61. * ? array<list<array{string, int}>>
  62. * : ($flags is int-mask<PREG_SET_ORDER, PREG_OFFSET_CAPTURE>&258
  63. * ? list<array<array{string, int}>>
  64. * : ($flags is int-mask<PREG_PATTERN_ORDER, PREG_UNMATCHED_AS_NULL>&(512|513)
  65. * ? array<list<?string>>
  66. * : ($flags is int-mask<PREG_SET_ORDER, PREG_UNMATCHED_AS_NULL>&514
  67. * ? list<array<?string>>
  68. * : ($flags is int-mask<PREG_SET_ORDER, PREG_OFFSET_CAPTURE, PREG_UNMATCHED_AS_NULL>&770
  69. * ? list<array<array{?string, int}>>
  70. * : ($flags is 0 ? array<list<string>> : array<mixed>)
  71. * )
  72. * )
  73. * )
  74. * )
  75. * )
  76. * )
  77. * ) $matches
  78. *
  79. * @throws PregException
  80. */
  81. public static function matchAll(string $pattern, string $subject, ?array &$matches = null, int $flags = PREG_PATTERN_ORDER, int $offset = 0): int
  82. {
  83. $result = @preg_match_all(self::addUtf8Modifier($pattern), $subject, $matches, $flags, $offset);
  84. if (false !== $result && PREG_NO_ERROR === preg_last_error()) {
  85. return $result;
  86. }
  87. $result = @preg_match_all(self::removeUtf8Modifier($pattern), $subject, $matches, $flags, $offset);
  88. if (false !== $result && PREG_NO_ERROR === preg_last_error()) {
  89. return $result;
  90. }
  91. throw self::newPregException(preg_last_error(), preg_last_error_msg(), __METHOD__, $pattern);
  92. }
  93. /**
  94. * @param array<array-key, string>|string $subject
  95. *
  96. * @param-out int $count
  97. *
  98. * @throws PregException
  99. */
  100. public static function replace(string $pattern, string $replacement, $subject, int $limit = -1, ?int &$count = null): string
  101. {
  102. $result = @preg_replace(self::addUtf8Modifier($pattern), $replacement, $subject, $limit, $count);
  103. if (null !== $result && PREG_NO_ERROR === preg_last_error()) {
  104. return $result;
  105. }
  106. $result = @preg_replace(self::removeUtf8Modifier($pattern), $replacement, $subject, $limit, $count);
  107. if (null !== $result && PREG_NO_ERROR === preg_last_error()) {
  108. return $result;
  109. }
  110. throw self::newPregException(preg_last_error(), preg_last_error_msg(), __METHOD__, $pattern);
  111. }
  112. /**
  113. * @param-out int $count
  114. *
  115. * @throws PregException
  116. */
  117. public static function replaceCallback(string $pattern, callable $callback, string $subject, int $limit = -1, ?int &$count = null): string
  118. {
  119. $result = @preg_replace_callback(self::addUtf8Modifier($pattern), $callback, $subject, $limit, $count);
  120. if (null !== $result && PREG_NO_ERROR === preg_last_error()) {
  121. return $result;
  122. }
  123. $result = @preg_replace_callback(self::removeUtf8Modifier($pattern), $callback, $subject, $limit, $count);
  124. if (null !== $result && PREG_NO_ERROR === preg_last_error()) {
  125. return $result;
  126. }
  127. throw self::newPregException(preg_last_error(), preg_last_error_msg(), __METHOD__, $pattern);
  128. }
  129. /**
  130. * @return list<string>
  131. *
  132. * @throws PregException
  133. */
  134. public static function split(string $pattern, string $subject, int $limit = -1, int $flags = 0): array
  135. {
  136. $result = @preg_split(self::addUtf8Modifier($pattern), $subject, $limit, $flags);
  137. if (false !== $result && PREG_NO_ERROR === preg_last_error()) {
  138. return $result;
  139. }
  140. $result = @preg_split(self::removeUtf8Modifier($pattern), $subject, $limit, $flags);
  141. if (false !== $result && PREG_NO_ERROR === preg_last_error()) {
  142. return $result;
  143. }
  144. throw self::newPregException(preg_last_error(), preg_last_error_msg(), __METHOD__, $pattern);
  145. }
  146. private static function addUtf8Modifier(string $pattern): string
  147. {
  148. return $pattern.'u';
  149. }
  150. private static function removeUtf8Modifier(string $pattern): string
  151. {
  152. if ('' === $pattern) {
  153. return '';
  154. }
  155. $delimiter = $pattern[0];
  156. $endDelimiterPosition = strrpos($pattern, $delimiter);
  157. return substr($pattern, 0, $endDelimiterPosition).str_replace('u', '', substr($pattern, $endDelimiterPosition));
  158. }
  159. /**
  160. * Create the generic PregException message and tell more about such kind of error in the message.
  161. */
  162. private static function newPregException(int $error, string $errorMsg, string $method, string $pattern): PregException
  163. {
  164. $result = null;
  165. $errorMessage = null;
  166. try {
  167. $result = ExecutorWithoutErrorHandler::execute(static fn () => preg_match($pattern, ''));
  168. } catch (ExecutorWithoutErrorHandlerException $e) {
  169. $result = false;
  170. $errorMessage = $e->getMessage();
  171. }
  172. if (false !== $result) {
  173. return new PregException(sprintf('Unknown error occurred when calling %s: %s.', $method, $errorMsg), $error);
  174. }
  175. $code = preg_last_error();
  176. $message = sprintf(
  177. '(code: %d) %s',
  178. $code,
  179. preg_replace('~preg_[a-z_]+[()]{2}: ~', '', $errorMessage)
  180. );
  181. return new PregException(
  182. sprintf('%s(): Invalid PCRE pattern "%s": %s (version: %s)', $method, $pattern, $message, PCRE_VERSION),
  183. $code
  184. );
  185. }
  186. }