DocLexer.php 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. <?php
  2. declare(strict_types=1);
  3. /*
  4. * This file is part of PHP CS Fixer.
  5. *
  6. * (c) Fabien Potencier <fabien@symfony.com>
  7. * Dariusz Rumiński <dariusz.ruminski@gmail.com>
  8. *
  9. * This source file is subject to the MIT license that is bundled
  10. * with this source code in the file LICENSE.
  11. */
  12. namespace PhpCsFixer\Doctrine\Annotation;
  13. use PhpCsFixer\Preg;
  14. /**
  15. * Copyright (c) 2006-2013 Doctrine Project.
  16. *
  17. * Permission is hereby granted, free of charge, to any person obtaining a copy of
  18. * this software and associated documentation files (the "Software"), to deal in
  19. * the Software without restriction, including without limitation the rights to
  20. * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  21. * of the Software, and to permit persons to whom the Software is furnished to do
  22. * so, subject to the following conditions:
  23. *
  24. * The above copyright notice and this permission notice shall be included in all
  25. * copies or substantial portions of the Software.
  26. *
  27. * @internal
  28. */
  29. final class DocLexer
  30. {
  31. public const T_NONE = 1;
  32. public const T_INTEGER = 2;
  33. public const T_STRING = 3;
  34. public const T_FLOAT = 4;
  35. // All tokens that are also identifiers should be >= 100
  36. public const T_IDENTIFIER = 100;
  37. public const T_AT = 101;
  38. public const T_CLOSE_CURLY_BRACES = 102;
  39. public const T_CLOSE_PARENTHESIS = 103;
  40. public const T_COMMA = 104;
  41. public const T_EQUALS = 105;
  42. public const T_FALSE = 106;
  43. public const T_NAMESPACE_SEPARATOR = 107;
  44. public const T_OPEN_CURLY_BRACES = 108;
  45. public const T_OPEN_PARENTHESIS = 109;
  46. public const T_TRUE = 110;
  47. public const T_NULL = 111;
  48. public const T_COLON = 112;
  49. public const T_MINUS = 113;
  50. /** @var array<string, self::T_*> */
  51. private array $noCase = [
  52. '@' => self::T_AT,
  53. ',' => self::T_COMMA,
  54. '(' => self::T_OPEN_PARENTHESIS,
  55. ')' => self::T_CLOSE_PARENTHESIS,
  56. '{' => self::T_OPEN_CURLY_BRACES,
  57. '}' => self::T_CLOSE_CURLY_BRACES,
  58. '=' => self::T_EQUALS,
  59. ':' => self::T_COLON,
  60. '-' => self::T_MINUS,
  61. '\\' => self::T_NAMESPACE_SEPARATOR,
  62. ];
  63. /** @var list<Token> */
  64. private array $tokens = [];
  65. private int $position = 0;
  66. private int $peek = 0;
  67. private ?string $regex = null;
  68. public function setInput(string $input): void
  69. {
  70. $this->tokens = [];
  71. $this->reset();
  72. $this->scan($input);
  73. }
  74. public function reset(): void
  75. {
  76. $this->peek = 0;
  77. $this->position = 0;
  78. }
  79. public function peek(): ?Token
  80. {
  81. if (isset($this->tokens[$this->position + $this->peek])) {
  82. return $this->tokens[$this->position + $this->peek++];
  83. }
  84. return null;
  85. }
  86. /**
  87. * @return list<string>
  88. */
  89. private function getCatchablePatterns(): array
  90. {
  91. return [
  92. '[a-z_\\\][a-z0-9_\:\\\]*[a-z_][a-z0-9_]*',
  93. '(?:[+-]?[0-9]+(?:[\.][0-9]+)*)(?:[eE][+-]?[0-9]+)?',
  94. '"(?:""|[^"])*+"',
  95. ];
  96. }
  97. /**
  98. * @return list<string>
  99. */
  100. private function getNonCatchablePatterns(): array
  101. {
  102. return ['\s+', '\*+', '(.)'];
  103. }
  104. /**
  105. * @return self::T_*
  106. */
  107. private function getType(string &$value): int
  108. {
  109. $type = self::T_NONE;
  110. if ('"' === $value[0]) {
  111. $value = str_replace('""', '"', substr($value, 1, \strlen($value) - 2));
  112. return self::T_STRING;
  113. }
  114. if (isset($this->noCase[$value])) {
  115. return $this->noCase[$value];
  116. }
  117. if ('_' === $value[0] || '\\' === $value[0] || !Preg::match('/[^A-Za-z]/', $value[0])) {
  118. return self::T_IDENTIFIER;
  119. }
  120. if (is_numeric($value)) {
  121. return str_contains($value, '.') || false !== stripos($value, 'e')
  122. ? self::T_FLOAT : self::T_INTEGER;
  123. }
  124. return $type;
  125. }
  126. private function scan(string $input): void
  127. {
  128. if (!isset($this->regex)) {
  129. $this->regex = \sprintf(
  130. '/(%s)|%s/%s',
  131. implode(')|(', $this->getCatchablePatterns()),
  132. implode('|', $this->getNonCatchablePatterns()),
  133. 'iu'
  134. );
  135. }
  136. $flags = PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_OFFSET_CAPTURE;
  137. $matches = Preg::split($this->regex, $input, -1, $flags);
  138. foreach ($matches as $match) {
  139. // Must remain before 'value' assignment since it can change content
  140. $firstMatch = $match[0];
  141. $type = $this->getType($firstMatch);
  142. $this->tokens[] = new Token($type, $firstMatch, (int) $match[1]);
  143. }
  144. }
  145. }