TokenParser.php 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. <?php
  2. namespace Doctrine\Common\Annotations;
  3. use function array_merge;
  4. use function count;
  5. use function explode;
  6. use function strtolower;
  7. use function token_get_all;
  8. use const PHP_VERSION_ID;
  9. use const T_AS;
  10. use const T_COMMENT;
  11. use const T_DOC_COMMENT;
  12. use const T_NAME_FULLY_QUALIFIED;
  13. use const T_NAME_QUALIFIED;
  14. use const T_NAMESPACE;
  15. use const T_NS_SEPARATOR;
  16. use const T_STRING;
  17. use const T_USE;
  18. use const T_WHITESPACE;
  19. /**
  20. * Parses a file for namespaces/use/class declarations.
  21. */
  22. class TokenParser
  23. {
  24. /**
  25. * The token list.
  26. *
  27. * @phpstan-var list<mixed[]>
  28. */
  29. private $tokens;
  30. /**
  31. * The number of tokens.
  32. *
  33. * @var int
  34. */
  35. private $numTokens;
  36. /**
  37. * The current array pointer.
  38. *
  39. * @var int
  40. */
  41. private $pointer = 0;
  42. public function __construct(string $contents)
  43. {
  44. $this->tokens = token_get_all($contents);
  45. // The PHP parser sets internal compiler globals for certain things. Annoyingly, the last docblock comment it
  46. // saw gets stored in doc_comment. When it comes to compile the next thing to be include()d this stored
  47. // doc_comment becomes owned by the first thing the compiler sees in the file that it considers might have a
  48. // docblock. If the first thing in the file is a class without a doc block this would cause calls to
  49. // getDocBlock() on said class to return our long lost doc_comment. Argh.
  50. // To workaround, cause the parser to parse an empty docblock. Sure getDocBlock() will return this, but at least
  51. // it's harmless to us.
  52. token_get_all("<?php\n/**\n *\n */");
  53. $this->numTokens = count($this->tokens);
  54. }
  55. /**
  56. * Gets the next non whitespace and non comment token.
  57. *
  58. * @param bool $docCommentIsComment If TRUE then a doc comment is considered a comment and skipped.
  59. * If FALSE then only whitespace and normal comments are skipped.
  60. *
  61. * @return mixed[]|string|null The token if exists, null otherwise.
  62. */
  63. public function next(bool $docCommentIsComment = true)
  64. {
  65. for ($i = $this->pointer; $i < $this->numTokens; $i++) {
  66. $this->pointer++;
  67. if (
  68. $this->tokens[$i][0] === T_WHITESPACE ||
  69. $this->tokens[$i][0] === T_COMMENT ||
  70. ($docCommentIsComment && $this->tokens[$i][0] === T_DOC_COMMENT)
  71. ) {
  72. continue;
  73. }
  74. return $this->tokens[$i];
  75. }
  76. return null;
  77. }
  78. /**
  79. * Parses a single use statement.
  80. *
  81. * @return array<string, string> A list with all found class names for a use statement.
  82. */
  83. public function parseUseStatement()
  84. {
  85. $groupRoot = '';
  86. $class = '';
  87. $alias = '';
  88. $statements = [];
  89. $explicitAlias = false;
  90. while (($token = $this->next())) {
  91. if (! $explicitAlias && $token[0] === T_STRING) {
  92. $class .= $token[1];
  93. $alias = $token[1];
  94. } elseif ($explicitAlias && $token[0] === T_STRING) {
  95. $alias = $token[1];
  96. } elseif (
  97. PHP_VERSION_ID >= 80000 &&
  98. ($token[0] === T_NAME_QUALIFIED || $token[0] === T_NAME_FULLY_QUALIFIED)
  99. ) {
  100. $class .= $token[1];
  101. $classSplit = explode('\\', $token[1]);
  102. $alias = $classSplit[count($classSplit) - 1];
  103. } elseif ($token[0] === T_NS_SEPARATOR) {
  104. $class .= '\\';
  105. $alias = '';
  106. } elseif ($token[0] === T_AS) {
  107. $explicitAlias = true;
  108. $alias = '';
  109. } elseif ($token === ',') {
  110. $statements[strtolower($alias)] = $groupRoot . $class;
  111. $class = '';
  112. $alias = '';
  113. $explicitAlias = false;
  114. } elseif ($token === ';') {
  115. $statements[strtolower($alias)] = $groupRoot . $class;
  116. break;
  117. } elseif ($token === '{') {
  118. $groupRoot = $class;
  119. $class = '';
  120. } elseif ($token === '}') {
  121. continue;
  122. } else {
  123. break;
  124. }
  125. }
  126. return $statements;
  127. }
  128. /**
  129. * Gets all use statements.
  130. *
  131. * @param string $namespaceName The namespace name of the reflected class.
  132. *
  133. * @return array<string, string> A list with all found use statements.
  134. */
  135. public function parseUseStatements(string $namespaceName)
  136. {
  137. $statements = [];
  138. while (($token = $this->next())) {
  139. if ($token[0] === T_USE) {
  140. $statements = array_merge($statements, $this->parseUseStatement());
  141. continue;
  142. }
  143. if ($token[0] !== T_NAMESPACE || $this->parseNamespace() !== $namespaceName) {
  144. continue;
  145. }
  146. // Get fresh array for new namespace. This is to prevent the parser to collect the use statements
  147. // for a previous namespace with the same name. This is the case if a namespace is defined twice
  148. // or if a namespace with the same name is commented out.
  149. $statements = [];
  150. }
  151. return $statements;
  152. }
  153. /**
  154. * Gets the namespace.
  155. *
  156. * @return string The found namespace.
  157. */
  158. public function parseNamespace()
  159. {
  160. $name = '';
  161. while (
  162. ($token = $this->next()) && ($token[0] === T_STRING || $token[0] === T_NS_SEPARATOR || (
  163. PHP_VERSION_ID >= 80000 &&
  164. ($token[0] === T_NAME_QUALIFIED || $token[0] === T_NAME_FULLY_QUALIFIED)
  165. ))
  166. ) {
  167. $name .= $token[1];
  168. }
  169. return $name;
  170. }
  171. /**
  172. * Gets the class name.
  173. *
  174. * @return string The found class name.
  175. */
  176. public function parseClass()
  177. {
  178. // Namespaces and class names are tokenized the same: T_STRINGs
  179. // separated by T_NS_SEPARATOR so we can use one function to provide
  180. // both.
  181. return $this->parseNamespace();
  182. }
  183. }