CodePointString.php 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\String;
  11. use Symfony\Component\String\Exception\ExceptionInterface;
  12. use Symfony\Component\String\Exception\InvalidArgumentException;
  13. /**
  14. * Represents a string of Unicode code points encoded as UTF-8.
  15. *
  16. * @author Nicolas Grekas <p@tchwork.com>
  17. * @author Hugo Hamon <hugohamon@neuf.fr>
  18. *
  19. * @throws ExceptionInterface
  20. */
  21. class CodePointString extends AbstractUnicodeString
  22. {
  23. public function __construct(string $string = '')
  24. {
  25. if ('' !== $string && !preg_match('//u', $string)) {
  26. throw new InvalidArgumentException('Invalid UTF-8 string.');
  27. }
  28. $this->string = $string;
  29. }
  30. public function append(string ...$suffix): static
  31. {
  32. $str = clone $this;
  33. $str->string .= 1 >= \count($suffix) ? ($suffix[0] ?? '') : implode('', $suffix);
  34. if (!preg_match('//u', $str->string)) {
  35. throw new InvalidArgumentException('Invalid UTF-8 string.');
  36. }
  37. return $str;
  38. }
  39. public function chunk(int $length = 1): array
  40. {
  41. if (1 > $length) {
  42. throw new InvalidArgumentException('The chunk length must be greater than zero.');
  43. }
  44. if ('' === $this->string) {
  45. return [];
  46. }
  47. $rx = '/(';
  48. while (65535 < $length) {
  49. $rx .= '.{65535}';
  50. $length -= 65535;
  51. }
  52. $rx .= '.{'.$length.'})/us';
  53. $str = clone $this;
  54. $chunks = [];
  55. foreach (preg_split($rx, $this->string, -1, \PREG_SPLIT_DELIM_CAPTURE | \PREG_SPLIT_NO_EMPTY) as $chunk) {
  56. $str->string = $chunk;
  57. $chunks[] = clone $str;
  58. }
  59. return $chunks;
  60. }
  61. public function codePointsAt(int $offset): array
  62. {
  63. $str = $offset ? $this->slice($offset, 1) : $this;
  64. return '' === $str->string ? [] : [mb_ord($str->string, 'UTF-8')];
  65. }
  66. public function endsWith(string|iterable|AbstractString $suffix): bool
  67. {
  68. if ($suffix instanceof AbstractString) {
  69. $suffix = $suffix->string;
  70. } elseif (!\is_string($suffix)) {
  71. return parent::endsWith($suffix);
  72. }
  73. if ('' === $suffix || !preg_match('//u', $suffix)) {
  74. return false;
  75. }
  76. if ($this->ignoreCase) {
  77. return preg_match('{'.preg_quote($suffix).'$}iuD', $this->string);
  78. }
  79. return \strlen($this->string) >= \strlen($suffix) && 0 === substr_compare($this->string, $suffix, -\strlen($suffix));
  80. }
  81. public function equalsTo(string|iterable|AbstractString $string): bool
  82. {
  83. if ($string instanceof AbstractString) {
  84. $string = $string->string;
  85. } elseif (!\is_string($string)) {
  86. return parent::equalsTo($string);
  87. }
  88. if ('' !== $string && $this->ignoreCase) {
  89. return \strlen($string) === \strlen($this->string) && 0 === mb_stripos($this->string, $string, 0, 'UTF-8');
  90. }
  91. return $string === $this->string;
  92. }
  93. public function indexOf(string|iterable|AbstractString $needle, int $offset = 0): ?int
  94. {
  95. if ($needle instanceof AbstractString) {
  96. $needle = $needle->string;
  97. } elseif (!\is_string($needle)) {
  98. return parent::indexOf($needle, $offset);
  99. }
  100. if ('' === $needle) {
  101. return null;
  102. }
  103. $i = $this->ignoreCase ? mb_stripos($this->string, $needle, $offset, 'UTF-8') : mb_strpos($this->string, $needle, $offset, 'UTF-8');
  104. return false === $i ? null : $i;
  105. }
  106. public function indexOfLast(string|iterable|AbstractString $needle, int $offset = 0): ?int
  107. {
  108. if ($needle instanceof AbstractString) {
  109. $needle = $needle->string;
  110. } elseif (!\is_string($needle)) {
  111. return parent::indexOfLast($needle, $offset);
  112. }
  113. if ('' === $needle) {
  114. return null;
  115. }
  116. $i = $this->ignoreCase ? mb_strripos($this->string, $needle, $offset, 'UTF-8') : mb_strrpos($this->string, $needle, $offset, 'UTF-8');
  117. return false === $i ? null : $i;
  118. }
  119. public function length(): int
  120. {
  121. return mb_strlen($this->string, 'UTF-8');
  122. }
  123. public function prepend(string ...$prefix): static
  124. {
  125. $str = clone $this;
  126. $str->string = (1 >= \count($prefix) ? ($prefix[0] ?? '') : implode('', $prefix)).$this->string;
  127. if (!preg_match('//u', $str->string)) {
  128. throw new InvalidArgumentException('Invalid UTF-8 string.');
  129. }
  130. return $str;
  131. }
  132. public function replace(string $from, string $to): static
  133. {
  134. $str = clone $this;
  135. if ('' === $from || !preg_match('//u', $from)) {
  136. return $str;
  137. }
  138. if ('' !== $to && !preg_match('//u', $to)) {
  139. throw new InvalidArgumentException('Invalid UTF-8 string.');
  140. }
  141. if ($this->ignoreCase) {
  142. $str->string = implode($to, preg_split('{'.preg_quote($from).'}iuD', $this->string));
  143. } else {
  144. $str->string = str_replace($from, $to, $this->string);
  145. }
  146. return $str;
  147. }
  148. public function slice(int $start = 0, ?int $length = null): static
  149. {
  150. $str = clone $this;
  151. $str->string = mb_substr($this->string, $start, $length, 'UTF-8');
  152. return $str;
  153. }
  154. public function splice(string $replacement, int $start = 0, ?int $length = null): static
  155. {
  156. if (!preg_match('//u', $replacement)) {
  157. throw new InvalidArgumentException('Invalid UTF-8 string.');
  158. }
  159. $str = clone $this;
  160. $start = $start ? \strlen(mb_substr($this->string, 0, $start, 'UTF-8')) : 0;
  161. $length = $length ? \strlen(mb_substr($this->string, $start, $length, 'UTF-8')) : $length;
  162. $str->string = substr_replace($this->string, $replacement, $start, $length ?? \PHP_INT_MAX);
  163. return $str;
  164. }
  165. public function split(string $delimiter, ?int $limit = null, ?int $flags = null): array
  166. {
  167. if (1 > $limit ??= \PHP_INT_MAX) {
  168. throw new InvalidArgumentException('Split limit must be a positive integer.');
  169. }
  170. if ('' === $delimiter) {
  171. throw new InvalidArgumentException('Split delimiter is empty.');
  172. }
  173. if (null !== $flags) {
  174. return parent::split($delimiter.'u', $limit, $flags);
  175. }
  176. if (!preg_match('//u', $delimiter)) {
  177. throw new InvalidArgumentException('Split delimiter is not a valid UTF-8 string.');
  178. }
  179. $str = clone $this;
  180. $chunks = $this->ignoreCase
  181. ? preg_split('{'.preg_quote($delimiter).'}iuD', $this->string, $limit)
  182. : explode($delimiter, $this->string, $limit);
  183. foreach ($chunks as &$chunk) {
  184. $str->string = $chunk;
  185. $chunk = clone $str;
  186. }
  187. return $chunks;
  188. }
  189. public function startsWith(string|iterable|AbstractString $prefix): bool
  190. {
  191. if ($prefix instanceof AbstractString) {
  192. $prefix = $prefix->string;
  193. } elseif (!\is_string($prefix)) {
  194. return parent::startsWith($prefix);
  195. }
  196. if ('' === $prefix || !preg_match('//u', $prefix)) {
  197. return false;
  198. }
  199. if ($this->ignoreCase) {
  200. return 0 === mb_stripos($this->string, $prefix, 0, 'UTF-8');
  201. }
  202. return 0 === strncmp($this->string, $prefix, \strlen($prefix));
  203. }
  204. }