ByteString.php 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\String;
  11. use Symfony\Component\String\Exception\ExceptionInterface;
  12. use Symfony\Component\String\Exception\InvalidArgumentException;
  13. use Symfony\Component\String\Exception\RuntimeException;
  14. /**
  15. * Represents a binary-safe string of bytes.
  16. *
  17. * @author Nicolas Grekas <p@tchwork.com>
  18. * @author Hugo Hamon <hugohamon@neuf.fr>
  19. *
  20. * @throws ExceptionInterface
  21. */
  22. class ByteString extends AbstractString
  23. {
  24. private const ALPHABET_ALPHANUMERIC = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz';
  25. public function __construct(string $string = '')
  26. {
  27. $this->string = $string;
  28. }
  29. /*
  30. * The following method was derived from code of the Hack Standard Library (v4.40 - 2020-05-03)
  31. *
  32. * https://github.com/hhvm/hsl/blob/80a42c02f036f72a42f0415e80d6b847f4bf62d5/src/random/private.php#L16
  33. *
  34. * Code subject to the MIT license (https://github.com/hhvm/hsl/blob/master/LICENSE).
  35. *
  36. * Copyright (c) 2004-2020, Facebook, Inc. (https://www.facebook.com/)
  37. */
  38. public static function fromRandom(int $length = 16, ?string $alphabet = null): self
  39. {
  40. if ($length <= 0) {
  41. throw new InvalidArgumentException(sprintf('A strictly positive length is expected, "%d" given.', $length));
  42. }
  43. $alphabet ??= self::ALPHABET_ALPHANUMERIC;
  44. $alphabetSize = \strlen($alphabet);
  45. $bits = (int) ceil(log($alphabetSize, 2.0));
  46. if ($bits <= 0 || $bits > 56) {
  47. throw new InvalidArgumentException('The length of the alphabet must in the [2^1, 2^56] range.');
  48. }
  49. $ret = '';
  50. while ($length > 0) {
  51. $urandomLength = (int) ceil(2 * $length * $bits / 8.0);
  52. $data = random_bytes($urandomLength);
  53. $unpackedData = 0;
  54. $unpackedBits = 0;
  55. for ($i = 0; $i < $urandomLength && $length > 0; ++$i) {
  56. // Unpack 8 bits
  57. $unpackedData = ($unpackedData << 8) | \ord($data[$i]);
  58. $unpackedBits += 8;
  59. // While we have enough bits to select a character from the alphabet, keep
  60. // consuming the random data
  61. for (; $unpackedBits >= $bits && $length > 0; $unpackedBits -= $bits) {
  62. $index = ($unpackedData & ((1 << $bits) - 1));
  63. $unpackedData >>= $bits;
  64. // Unfortunately, the alphabet size is not necessarily a power of two.
  65. // Worst case, it is 2^k + 1, which means we need (k+1) bits and we
  66. // have around a 50% chance of missing as k gets larger
  67. if ($index < $alphabetSize) {
  68. $ret .= $alphabet[$index];
  69. --$length;
  70. }
  71. }
  72. }
  73. }
  74. return new static($ret);
  75. }
  76. public function bytesAt(int $offset): array
  77. {
  78. $str = $this->string[$offset] ?? '';
  79. return '' === $str ? [] : [\ord($str)];
  80. }
  81. public function append(string ...$suffix): static
  82. {
  83. $str = clone $this;
  84. $str->string .= 1 >= \count($suffix) ? ($suffix[0] ?? '') : implode('', $suffix);
  85. return $str;
  86. }
  87. public function camel(): static
  88. {
  89. $str = clone $this;
  90. $parts = explode(' ', trim(ucwords(preg_replace('/[^a-zA-Z0-9\x7f-\xff]++/', ' ', $this->string))));
  91. $parts[0] = 1 !== \strlen($parts[0]) && ctype_upper($parts[0]) ? $parts[0] : lcfirst($parts[0]);
  92. $str->string = implode('', $parts);
  93. return $str;
  94. }
  95. public function chunk(int $length = 1): array
  96. {
  97. if (1 > $length) {
  98. throw new InvalidArgumentException('The chunk length must be greater than zero.');
  99. }
  100. if ('' === $this->string) {
  101. return [];
  102. }
  103. $str = clone $this;
  104. $chunks = [];
  105. foreach (str_split($this->string, $length) as $chunk) {
  106. $str->string = $chunk;
  107. $chunks[] = clone $str;
  108. }
  109. return $chunks;
  110. }
  111. public function endsWith(string|iterable|AbstractString $suffix): bool
  112. {
  113. if ($suffix instanceof AbstractString) {
  114. $suffix = $suffix->string;
  115. } elseif (!\is_string($suffix)) {
  116. return parent::endsWith($suffix);
  117. }
  118. return '' !== $suffix && \strlen($this->string) >= \strlen($suffix) && 0 === substr_compare($this->string, $suffix, -\strlen($suffix), null, $this->ignoreCase);
  119. }
  120. public function equalsTo(string|iterable|AbstractString $string): bool
  121. {
  122. if ($string instanceof AbstractString) {
  123. $string = $string->string;
  124. } elseif (!\is_string($string)) {
  125. return parent::equalsTo($string);
  126. }
  127. if ('' !== $string && $this->ignoreCase) {
  128. return 0 === strcasecmp($string, $this->string);
  129. }
  130. return $string === $this->string;
  131. }
  132. public function folded(): static
  133. {
  134. $str = clone $this;
  135. $str->string = strtolower($str->string);
  136. return $str;
  137. }
  138. public function indexOf(string|iterable|AbstractString $needle, int $offset = 0): ?int
  139. {
  140. if ($needle instanceof AbstractString) {
  141. $needle = $needle->string;
  142. } elseif (!\is_string($needle)) {
  143. return parent::indexOf($needle, $offset);
  144. }
  145. if ('' === $needle) {
  146. return null;
  147. }
  148. $i = $this->ignoreCase ? stripos($this->string, $needle, $offset) : strpos($this->string, $needle, $offset);
  149. return false === $i ? null : $i;
  150. }
  151. public function indexOfLast(string|iterable|AbstractString $needle, int $offset = 0): ?int
  152. {
  153. if ($needle instanceof AbstractString) {
  154. $needle = $needle->string;
  155. } elseif (!\is_string($needle)) {
  156. return parent::indexOfLast($needle, $offset);
  157. }
  158. if ('' === $needle) {
  159. return null;
  160. }
  161. $i = $this->ignoreCase ? strripos($this->string, $needle, $offset) : strrpos($this->string, $needle, $offset);
  162. return false === $i ? null : $i;
  163. }
  164. public function isUtf8(): bool
  165. {
  166. return '' === $this->string || preg_match('//u', $this->string);
  167. }
  168. public function join(array $strings, ?string $lastGlue = null): static
  169. {
  170. $str = clone $this;
  171. $tail = null !== $lastGlue && 1 < \count($strings) ? $lastGlue.array_pop($strings) : '';
  172. $str->string = implode($this->string, $strings).$tail;
  173. return $str;
  174. }
  175. public function length(): int
  176. {
  177. return \strlen($this->string);
  178. }
  179. public function lower(): static
  180. {
  181. $str = clone $this;
  182. $str->string = strtolower($str->string);
  183. return $str;
  184. }
  185. public function match(string $regexp, int $flags = 0, int $offset = 0): array
  186. {
  187. $match = ((\PREG_PATTERN_ORDER | \PREG_SET_ORDER) & $flags) ? 'preg_match_all' : 'preg_match';
  188. if ($this->ignoreCase) {
  189. $regexp .= 'i';
  190. }
  191. set_error_handler(static fn ($t, $m) => throw new InvalidArgumentException($m));
  192. try {
  193. if (false === $match($regexp, $this->string, $matches, $flags | \PREG_UNMATCHED_AS_NULL, $offset)) {
  194. throw new RuntimeException('Matching failed with error: '.preg_last_error_msg());
  195. }
  196. } finally {
  197. restore_error_handler();
  198. }
  199. return $matches;
  200. }
  201. public function padBoth(int $length, string $padStr = ' '): static
  202. {
  203. $str = clone $this;
  204. $str->string = str_pad($this->string, $length, $padStr, \STR_PAD_BOTH);
  205. return $str;
  206. }
  207. public function padEnd(int $length, string $padStr = ' '): static
  208. {
  209. $str = clone $this;
  210. $str->string = str_pad($this->string, $length, $padStr, \STR_PAD_RIGHT);
  211. return $str;
  212. }
  213. public function padStart(int $length, string $padStr = ' '): static
  214. {
  215. $str = clone $this;
  216. $str->string = str_pad($this->string, $length, $padStr, \STR_PAD_LEFT);
  217. return $str;
  218. }
  219. public function prepend(string ...$prefix): static
  220. {
  221. $str = clone $this;
  222. $str->string = (1 >= \count($prefix) ? ($prefix[0] ?? '') : implode('', $prefix)).$str->string;
  223. return $str;
  224. }
  225. public function replace(string $from, string $to): static
  226. {
  227. $str = clone $this;
  228. if ('' !== $from) {
  229. $str->string = $this->ignoreCase ? str_ireplace($from, $to, $this->string) : str_replace($from, $to, $this->string);
  230. }
  231. return $str;
  232. }
  233. public function replaceMatches(string $fromRegexp, string|callable $to): static
  234. {
  235. if ($this->ignoreCase) {
  236. $fromRegexp .= 'i';
  237. }
  238. $replace = \is_array($to) || $to instanceof \Closure ? 'preg_replace_callback' : 'preg_replace';
  239. set_error_handler(static fn ($t, $m) => throw new InvalidArgumentException($m));
  240. try {
  241. if (null === $string = $replace($fromRegexp, $to, $this->string)) {
  242. $lastError = preg_last_error();
  243. foreach (get_defined_constants(true)['pcre'] as $k => $v) {
  244. if ($lastError === $v && str_ends_with($k, '_ERROR')) {
  245. throw new RuntimeException('Matching failed with '.$k.'.');
  246. }
  247. }
  248. throw new RuntimeException('Matching failed with unknown error code.');
  249. }
  250. } finally {
  251. restore_error_handler();
  252. }
  253. $str = clone $this;
  254. $str->string = $string;
  255. return $str;
  256. }
  257. public function reverse(): static
  258. {
  259. $str = clone $this;
  260. $str->string = strrev($str->string);
  261. return $str;
  262. }
  263. public function slice(int $start = 0, ?int $length = null): static
  264. {
  265. $str = clone $this;
  266. $str->string = (string) substr($this->string, $start, $length ?? \PHP_INT_MAX);
  267. return $str;
  268. }
  269. public function snake(): static
  270. {
  271. $str = $this->camel();
  272. $str->string = strtolower(preg_replace(['/([A-Z]+)([A-Z][a-z])/', '/([a-z\d])([A-Z])/'], '\1_\2', $str->string));
  273. return $str;
  274. }
  275. public function splice(string $replacement, int $start = 0, ?int $length = null): static
  276. {
  277. $str = clone $this;
  278. $str->string = substr_replace($this->string, $replacement, $start, $length ?? \PHP_INT_MAX);
  279. return $str;
  280. }
  281. public function split(string $delimiter, ?int $limit = null, ?int $flags = null): array
  282. {
  283. if (1 > $limit ??= \PHP_INT_MAX) {
  284. throw new InvalidArgumentException('Split limit must be a positive integer.');
  285. }
  286. if ('' === $delimiter) {
  287. throw new InvalidArgumentException('Split delimiter is empty.');
  288. }
  289. if (null !== $flags) {
  290. return parent::split($delimiter, $limit, $flags);
  291. }
  292. $str = clone $this;
  293. $chunks = $this->ignoreCase
  294. ? preg_split('{'.preg_quote($delimiter).'}iD', $this->string, $limit)
  295. : explode($delimiter, $this->string, $limit);
  296. foreach ($chunks as &$chunk) {
  297. $str->string = $chunk;
  298. $chunk = clone $str;
  299. }
  300. return $chunks;
  301. }
  302. public function startsWith(string|iterable|AbstractString $prefix): bool
  303. {
  304. if ($prefix instanceof AbstractString) {
  305. $prefix = $prefix->string;
  306. } elseif (!\is_string($prefix)) {
  307. return parent::startsWith($prefix);
  308. }
  309. return '' !== $prefix && 0 === ($this->ignoreCase ? strncasecmp($this->string, $prefix, \strlen($prefix)) : strncmp($this->string, $prefix, \strlen($prefix)));
  310. }
  311. public function title(bool $allWords = false): static
  312. {
  313. $str = clone $this;
  314. $str->string = $allWords ? ucwords($str->string) : ucfirst($str->string);
  315. return $str;
  316. }
  317. public function toUnicodeString(?string $fromEncoding = null): UnicodeString
  318. {
  319. return new UnicodeString($this->toCodePointString($fromEncoding)->string);
  320. }
  321. public function toCodePointString(?string $fromEncoding = null): CodePointString
  322. {
  323. $u = new CodePointString();
  324. if (\in_array($fromEncoding, [null, 'utf8', 'utf-8', 'UTF8', 'UTF-8'], true) && preg_match('//u', $this->string)) {
  325. $u->string = $this->string;
  326. return $u;
  327. }
  328. set_error_handler(static fn ($t, $m) => throw new InvalidArgumentException($m));
  329. try {
  330. try {
  331. $validEncoding = false !== mb_detect_encoding($this->string, $fromEncoding ?? 'Windows-1252', true);
  332. } catch (InvalidArgumentException $e) {
  333. if (!\function_exists('iconv')) {
  334. throw $e;
  335. }
  336. $u->string = iconv($fromEncoding ?? 'Windows-1252', 'UTF-8', $this->string);
  337. return $u;
  338. }
  339. } finally {
  340. restore_error_handler();
  341. }
  342. if (!$validEncoding) {
  343. throw new InvalidArgumentException(sprintf('Invalid "%s" string.', $fromEncoding ?? 'Windows-1252'));
  344. }
  345. $u->string = mb_convert_encoding($this->string, 'UTF-8', $fromEncoding ?? 'Windows-1252');
  346. return $u;
  347. }
  348. public function trim(string $chars = " \t\n\r\0\x0B\x0C"): static
  349. {
  350. $str = clone $this;
  351. $str->string = trim($str->string, $chars);
  352. return $str;
  353. }
  354. public function trimEnd(string $chars = " \t\n\r\0\x0B\x0C"): static
  355. {
  356. $str = clone $this;
  357. $str->string = rtrim($str->string, $chars);
  358. return $str;
  359. }
  360. public function trimStart(string $chars = " \t\n\r\0\x0B\x0C"): static
  361. {
  362. $str = clone $this;
  363. $str->string = ltrim($str->string, $chars);
  364. return $str;
  365. }
  366. public function upper(): static
  367. {
  368. $str = clone $this;
  369. $str->string = strtoupper($str->string);
  370. return $str;
  371. }
  372. public function width(bool $ignoreAnsiDecoration = true): int
  373. {
  374. $string = preg_match('//u', $this->string) ? $this->string : preg_replace('/[\x80-\xFF]/', '?', $this->string);
  375. return (new CodePointString($string))->width($ignoreAnsiDecoration);
  376. }
  377. }