StringUtils.php 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213
  1. <?php // phpcs:disable WebimpressCodingStandard.NamingConventions.AbstractClass.Prefix
  2. declare(strict_types=1);
  3. namespace Laminas\Stdlib;
  4. use Laminas\Stdlib\StringWrapper\Iconv;
  5. use Laminas\Stdlib\StringWrapper\Intl;
  6. use Laminas\Stdlib\StringWrapper\MbString;
  7. use Laminas\Stdlib\StringWrapper\Native;
  8. use Laminas\Stdlib\StringWrapper\StringWrapperInterface;
  9. use function array_search;
  10. use function defined;
  11. use function extension_loaded;
  12. use function in_array;
  13. use function is_string;
  14. use function preg_match;
  15. use function strtoupper;
  16. /**
  17. * Utility class for handling strings of different character encodings
  18. * using available PHP extensions.
  19. *
  20. * Declared abstract, as we have no need for instantiation.
  21. */
  22. abstract class StringUtils
  23. {
  24. /**
  25. * Ordered list of registered string wrapper instances
  26. *
  27. * @var list<class-string<StringWrapperInterface>>|null
  28. */
  29. protected static $wrapperRegistry;
  30. /**
  31. * A list of known single-byte character encodings (upper-case)
  32. *
  33. * @var string[]
  34. */
  35. protected static $singleByteEncodings = [
  36. 'ASCII',
  37. '7BIT',
  38. '8BIT',
  39. 'ISO-8859-1',
  40. 'ISO-8859-2',
  41. 'ISO-8859-3',
  42. 'ISO-8859-4',
  43. 'ISO-8859-5',
  44. 'ISO-8859-6',
  45. 'ISO-8859-7',
  46. 'ISO-8859-8',
  47. 'ISO-8859-9',
  48. 'ISO-8859-10',
  49. 'ISO-8859-11',
  50. 'ISO-8859-13',
  51. 'ISO-8859-14',
  52. 'ISO-8859-15',
  53. 'ISO-8859-16',
  54. 'CP-1251',
  55. 'CP-1252',
  56. // TODO
  57. ];
  58. /**
  59. * Is PCRE compiled with Unicode support?
  60. *
  61. * @var bool
  62. **/
  63. protected static $hasPcreUnicodeSupport;
  64. /**
  65. * Get registered wrapper classes
  66. *
  67. * @return string[]
  68. * @psalm-return list<class-string<StringWrapperInterface>>
  69. */
  70. public static function getRegisteredWrappers()
  71. {
  72. if (static::$wrapperRegistry === null) {
  73. static::$wrapperRegistry = [];
  74. if (extension_loaded('intl')) {
  75. static::$wrapperRegistry[] = Intl::class;
  76. }
  77. if (extension_loaded('mbstring')) {
  78. static::$wrapperRegistry[] = MbString::class;
  79. }
  80. if (extension_loaded('iconv')) {
  81. static::$wrapperRegistry[] = Iconv::class;
  82. }
  83. static::$wrapperRegistry[] = Native::class;
  84. }
  85. return static::$wrapperRegistry;
  86. }
  87. /**
  88. * Register a string wrapper class
  89. *
  90. * @param class-string<StringWrapperInterface> $wrapper
  91. * @return void
  92. */
  93. public static function registerWrapper($wrapper)
  94. {
  95. $wrapper = (string) $wrapper;
  96. // using getRegisteredWrappers() here to ensure that the list is initialized
  97. if (! in_array($wrapper, static::getRegisteredWrappers(), true)) {
  98. static::$wrapperRegistry[] = $wrapper;
  99. }
  100. }
  101. /**
  102. * Unregister a string wrapper class
  103. *
  104. * @param class-string<StringWrapperInterface> $wrapper
  105. * @return void
  106. */
  107. public static function unregisterWrapper($wrapper)
  108. {
  109. // using getRegisteredWrappers() here to ensure that the list is initialized
  110. $index = array_search((string) $wrapper, static::getRegisteredWrappers(), true);
  111. if ($index !== false) {
  112. unset(static::$wrapperRegistry[$index]);
  113. }
  114. }
  115. /**
  116. * Reset all registered wrappers so the default wrappers will be used
  117. *
  118. * @return void
  119. */
  120. public static function resetRegisteredWrappers()
  121. {
  122. static::$wrapperRegistry = null;
  123. }
  124. /**
  125. * Get the first string wrapper supporting the given character encoding
  126. * and supports to convert into the given convert encoding.
  127. *
  128. * @param string $encoding Character encoding to support
  129. * @param string|null $convertEncoding OPTIONAL character encoding to convert in
  130. * @return StringWrapperInterface
  131. * @throws Exception\RuntimeException If no wrapper supports given character encodings.
  132. */
  133. public static function getWrapper($encoding = 'UTF-8', $convertEncoding = null)
  134. {
  135. foreach (static::getRegisteredWrappers() as $wrapperClass) {
  136. if ($wrapperClass::isSupported($encoding, $convertEncoding)) {
  137. $wrapper = new $wrapperClass($encoding, $convertEncoding);
  138. $wrapper->setEncoding($encoding, $convertEncoding);
  139. return $wrapper;
  140. }
  141. }
  142. throw new Exception\RuntimeException(
  143. 'No wrapper found supporting "' . $encoding . '"'
  144. . ($convertEncoding !== null ? ' and "' . $convertEncoding . '"' : '')
  145. );
  146. }
  147. /**
  148. * Get a list of all known single-byte character encodings
  149. *
  150. * @return string[]
  151. */
  152. public static function getSingleByteEncodings()
  153. {
  154. return static::$singleByteEncodings;
  155. }
  156. /**
  157. * Check if a given encoding is a known single-byte character encoding
  158. *
  159. * @param string $encoding
  160. * @return bool
  161. */
  162. public static function isSingleByteEncoding($encoding)
  163. {
  164. return in_array(strtoupper($encoding), static::$singleByteEncodings);
  165. }
  166. /**
  167. * Check if a given string is valid UTF-8 encoded
  168. *
  169. * @param string $str
  170. * @return bool
  171. */
  172. public static function isValidUtf8($str)
  173. {
  174. return is_string($str) && ($str === '' || preg_match('/^./su', $str) === 1);
  175. }
  176. /**
  177. * Is PCRE compiled with Unicode support?
  178. *
  179. * @return bool
  180. */
  181. public static function hasPcreUnicodeSupport()
  182. {
  183. if (static::$hasPcreUnicodeSupport === null) {
  184. ErrorHandler::start();
  185. static::$hasPcreUnicodeSupport = defined('PREG_BAD_UTF8_OFFSET_ERROR') && preg_match('/\pL/u', 'a') === 1;
  186. ErrorHandler::stop();
  187. }
  188. return static::$hasPcreUnicodeSupport;
  189. }
  190. }