AsciiSlugger.php 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\String\Slugger;
  11. use Symfony\Component\Intl\Transliterator\EmojiTransliterator;
  12. use Symfony\Component\String\AbstractUnicodeString;
  13. use Symfony\Component\String\UnicodeString;
  14. use Symfony\Contracts\Translation\LocaleAwareInterface;
  15. if (!interface_exists(LocaleAwareInterface::class)) {
  16. throw new \LogicException('You cannot use the "Symfony\Component\String\Slugger\AsciiSlugger" as the "symfony/translation-contracts" package is not installed. Try running "composer require symfony/translation-contracts".');
  17. }
  18. /**
  19. * @author Titouan Galopin <galopintitouan@gmail.com>
  20. */
  21. class AsciiSlugger implements SluggerInterface, LocaleAwareInterface
  22. {
  23. private const LOCALE_TO_TRANSLITERATOR_ID = [
  24. 'am' => 'Amharic-Latin',
  25. 'ar' => 'Arabic-Latin',
  26. 'az' => 'Azerbaijani-Latin',
  27. 'be' => 'Belarusian-Latin',
  28. 'bg' => 'Bulgarian-Latin',
  29. 'bn' => 'Bengali-Latin',
  30. 'de' => 'de-ASCII',
  31. 'el' => 'Greek-Latin',
  32. 'fa' => 'Persian-Latin',
  33. 'he' => 'Hebrew-Latin',
  34. 'hy' => 'Armenian-Latin',
  35. 'ka' => 'Georgian-Latin',
  36. 'kk' => 'Kazakh-Latin',
  37. 'ky' => 'Kirghiz-Latin',
  38. 'ko' => 'Korean-Latin',
  39. 'mk' => 'Macedonian-Latin',
  40. 'mn' => 'Mongolian-Latin',
  41. 'or' => 'Oriya-Latin',
  42. 'ps' => 'Pashto-Latin',
  43. 'ru' => 'Russian-Latin',
  44. 'sr' => 'Serbian-Latin',
  45. 'sr_Cyrl' => 'Serbian-Latin',
  46. 'th' => 'Thai-Latin',
  47. 'tk' => 'Turkmen-Latin',
  48. 'uk' => 'Ukrainian-Latin',
  49. 'uz' => 'Uzbek-Latin',
  50. 'zh' => 'Han-Latin',
  51. ];
  52. private ?string $defaultLocale;
  53. private \Closure|array $symbolsMap = [
  54. 'en' => ['@' => 'at', '&' => 'and'],
  55. ];
  56. private bool|string $emoji = false;
  57. /**
  58. * Cache of transliterators per locale.
  59. *
  60. * @var \Transliterator[]
  61. */
  62. private array $transliterators = [];
  63. public function __construct(?string $defaultLocale = null, array|\Closure|null $symbolsMap = null)
  64. {
  65. $this->defaultLocale = $defaultLocale;
  66. $this->symbolsMap = $symbolsMap ?? $this->symbolsMap;
  67. }
  68. /**
  69. * @return void
  70. */
  71. public function setLocale(string $locale)
  72. {
  73. $this->defaultLocale = $locale;
  74. }
  75. public function getLocale(): string
  76. {
  77. return $this->defaultLocale;
  78. }
  79. /**
  80. * @param bool|string $emoji true will use the same locale,
  81. * false will disable emoji,
  82. * and a string to use a specific locale
  83. */
  84. public function withEmoji(bool|string $emoji = true): static
  85. {
  86. if (false !== $emoji && !class_exists(EmojiTransliterator::class)) {
  87. throw new \LogicException(sprintf('You cannot use the "%s()" method as the "symfony/intl" package is not installed. Try running "composer require symfony/intl".', __METHOD__));
  88. }
  89. $new = clone $this;
  90. $new->emoji = $emoji;
  91. return $new;
  92. }
  93. public function slug(string $string, string $separator = '-', ?string $locale = null): AbstractUnicodeString
  94. {
  95. $locale ??= $this->defaultLocale;
  96. $transliterator = [];
  97. if ($locale && ('de' === $locale || str_starts_with($locale, 'de_'))) {
  98. // Use the shortcut for German in UnicodeString::ascii() if possible (faster and no requirement on intl)
  99. $transliterator = ['de-ASCII'];
  100. } elseif (\function_exists('transliterator_transliterate') && $locale) {
  101. $transliterator = (array) $this->createTransliterator($locale);
  102. }
  103. if ($emojiTransliterator = $this->createEmojiTransliterator($locale)) {
  104. $transliterator[] = $emojiTransliterator;
  105. }
  106. if ($this->symbolsMap instanceof \Closure) {
  107. // If the symbols map is passed as a closure, there is no need to fallback to the parent locale
  108. // as the closure can just provide substitutions for all locales of interest.
  109. $symbolsMap = $this->symbolsMap;
  110. array_unshift($transliterator, static fn ($s) => $symbolsMap($s, $locale));
  111. }
  112. $unicodeString = (new UnicodeString($string))->ascii($transliterator);
  113. if (\is_array($this->symbolsMap)) {
  114. $map = null;
  115. if (isset($this->symbolsMap[$locale])) {
  116. $map = $this->symbolsMap[$locale];
  117. } else {
  118. $parent = self::getParentLocale($locale);
  119. if ($parent && isset($this->symbolsMap[$parent])) {
  120. $map = $this->symbolsMap[$parent];
  121. }
  122. }
  123. if ($map) {
  124. foreach ($map as $char => $replace) {
  125. $unicodeString = $unicodeString->replace($char, ' '.$replace.' ');
  126. }
  127. }
  128. }
  129. return $unicodeString
  130. ->replaceMatches('/[^A-Za-z0-9]++/', $separator)
  131. ->trim($separator)
  132. ;
  133. }
  134. private function createTransliterator(string $locale): ?\Transliterator
  135. {
  136. if (\array_key_exists($locale, $this->transliterators)) {
  137. return $this->transliterators[$locale];
  138. }
  139. // Exact locale supported, cache and return
  140. if ($id = self::LOCALE_TO_TRANSLITERATOR_ID[$locale] ?? null) {
  141. return $this->transliterators[$locale] = \Transliterator::create($id.'/BGN') ?? \Transliterator::create($id);
  142. }
  143. // Locale not supported and no parent, fallback to any-latin
  144. if (!$parent = self::getParentLocale($locale)) {
  145. return $this->transliterators[$locale] = null;
  146. }
  147. // Try to use the parent locale (ie. try "de" for "de_AT") and cache both locales
  148. if ($id = self::LOCALE_TO_TRANSLITERATOR_ID[$parent] ?? null) {
  149. $transliterator = \Transliterator::create($id.'/BGN') ?? \Transliterator::create($id);
  150. }
  151. return $this->transliterators[$locale] = $this->transliterators[$parent] = $transliterator ?? null;
  152. }
  153. private function createEmojiTransliterator(?string $locale): ?EmojiTransliterator
  154. {
  155. if (\is_string($this->emoji)) {
  156. $locale = $this->emoji;
  157. } elseif (!$this->emoji) {
  158. return null;
  159. }
  160. while (null !== $locale) {
  161. try {
  162. return EmojiTransliterator::create("emoji-$locale");
  163. } catch (\IntlException) {
  164. $locale = self::getParentLocale($locale);
  165. }
  166. }
  167. return null;
  168. }
  169. private static function getParentLocale(?string $locale): ?string
  170. {
  171. if (!$locale) {
  172. return null;
  173. }
  174. if (false === $str = strrchr($locale, '_')) {
  175. // no parent locale
  176. return null;
  177. }
  178. return substr($locale, 0, -\strlen($str));
  179. }
  180. }