Differ.php 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. <?php declare(strict_types=1);
  2. /*
  3. * This file is part of sebastian/diff.
  4. *
  5. * (c) Sebastian Bergmann <sebastian@phpunit.de>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace SebastianBergmann\Diff;
  11. use const PHP_INT_SIZE;
  12. use const PREG_SPLIT_DELIM_CAPTURE;
  13. use const PREG_SPLIT_NO_EMPTY;
  14. use function array_shift;
  15. use function array_unshift;
  16. use function array_values;
  17. use function count;
  18. use function current;
  19. use function end;
  20. use function is_string;
  21. use function key;
  22. use function min;
  23. use function preg_split;
  24. use function prev;
  25. use function reset;
  26. use function str_ends_with;
  27. use function substr;
  28. use SebastianBergmann\Diff\Output\DiffOutputBuilderInterface;
  29. final class Differ
  30. {
  31. public const OLD = 0;
  32. public const ADDED = 1;
  33. public const REMOVED = 2;
  34. public const DIFF_LINE_END_WARNING = 3;
  35. public const NO_LINE_END_EOF_WARNING = 4;
  36. private DiffOutputBuilderInterface $outputBuilder;
  37. public function __construct(DiffOutputBuilderInterface $outputBuilder)
  38. {
  39. $this->outputBuilder = $outputBuilder;
  40. }
  41. public function diff(array|string $from, array|string $to, ?LongestCommonSubsequenceCalculator $lcs = null): string
  42. {
  43. $diff = $this->diffToArray($from, $to, $lcs);
  44. return $this->outputBuilder->getDiff($diff);
  45. }
  46. public function diffToArray(array|string $from, array|string $to, ?LongestCommonSubsequenceCalculator $lcs = null): array
  47. {
  48. if (is_string($from)) {
  49. $from = $this->splitStringByLines($from);
  50. }
  51. if (is_string($to)) {
  52. $to = $this->splitStringByLines($to);
  53. }
  54. [$from, $to, $start, $end] = self::getArrayDiffParted($from, $to);
  55. if ($lcs === null) {
  56. $lcs = $this->selectLcsImplementation($from, $to);
  57. }
  58. $common = $lcs->calculate(array_values($from), array_values($to));
  59. $diff = [];
  60. foreach ($start as $token) {
  61. $diff[] = [$token, self::OLD];
  62. }
  63. reset($from);
  64. reset($to);
  65. foreach ($common as $token) {
  66. while (($fromToken = reset($from)) !== $token) {
  67. $diff[] = [array_shift($from), self::REMOVED];
  68. }
  69. while (($toToken = reset($to)) !== $token) {
  70. $diff[] = [array_shift($to), self::ADDED];
  71. }
  72. $diff[] = [$token, self::OLD];
  73. array_shift($from);
  74. array_shift($to);
  75. }
  76. while (($token = array_shift($from)) !== null) {
  77. $diff[] = [$token, self::REMOVED];
  78. }
  79. while (($token = array_shift($to)) !== null) {
  80. $diff[] = [$token, self::ADDED];
  81. }
  82. foreach ($end as $token) {
  83. $diff[] = [$token, self::OLD];
  84. }
  85. if ($this->detectUnmatchedLineEndings($diff)) {
  86. array_unshift($diff, ["#Warning: Strings contain different line endings!\n", self::DIFF_LINE_END_WARNING]);
  87. }
  88. return $diff;
  89. }
  90. private function splitStringByLines(string $input): array
  91. {
  92. return preg_split('/(.*\R)/', $input, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
  93. }
  94. private function selectLcsImplementation(array $from, array $to): LongestCommonSubsequenceCalculator
  95. {
  96. // We do not want to use the time-efficient implementation if its memory
  97. // footprint will probably exceed this value. Note that the footprint
  98. // calculation is only an estimation for the matrix and the LCS method
  99. // will typically allocate a bit more memory than this.
  100. $memoryLimit = 100 * 1024 * 1024;
  101. if ($this->calculateEstimatedFootprint($from, $to) > $memoryLimit) {
  102. return new MemoryEfficientLongestCommonSubsequenceCalculator;
  103. }
  104. return new TimeEfficientLongestCommonSubsequenceCalculator;
  105. }
  106. private function calculateEstimatedFootprint(array $from, array $to): float|int
  107. {
  108. $itemSize = PHP_INT_SIZE === 4 ? 76 : 144;
  109. return $itemSize * min(count($from), count($to)) ** 2;
  110. }
  111. private function detectUnmatchedLineEndings(array $diff): bool
  112. {
  113. $newLineBreaks = ['' => true];
  114. $oldLineBreaks = ['' => true];
  115. foreach ($diff as $entry) {
  116. if (self::OLD === $entry[1]) {
  117. $ln = $this->getLinebreak($entry[0]);
  118. $oldLineBreaks[$ln] = true;
  119. $newLineBreaks[$ln] = true;
  120. } elseif (self::ADDED === $entry[1]) {
  121. $newLineBreaks[$this->getLinebreak($entry[0])] = true;
  122. } elseif (self::REMOVED === $entry[1]) {
  123. $oldLineBreaks[$this->getLinebreak($entry[0])] = true;
  124. }
  125. }
  126. // if either input or output is a single line without breaks than no warning should be raised
  127. if (['' => true] === $newLineBreaks || ['' => true] === $oldLineBreaks) {
  128. return false;
  129. }
  130. // two-way compare
  131. foreach ($newLineBreaks as $break => $set) {
  132. if (!isset($oldLineBreaks[$break])) {
  133. return true;
  134. }
  135. }
  136. foreach ($oldLineBreaks as $break => $set) {
  137. if (!isset($newLineBreaks[$break])) {
  138. return true;
  139. }
  140. }
  141. return false;
  142. }
  143. private function getLinebreak($line): string
  144. {
  145. if (!is_string($line)) {
  146. return '';
  147. }
  148. $lc = substr($line, -1);
  149. if ("\r" === $lc) {
  150. return "\r";
  151. }
  152. if ("\n" !== $lc) {
  153. return '';
  154. }
  155. if (str_ends_with($line, "\r\n")) {
  156. return "\r\n";
  157. }
  158. return "\n";
  159. }
  160. private static function getArrayDiffParted(array &$from, array &$to): array
  161. {
  162. $start = [];
  163. $end = [];
  164. reset($to);
  165. foreach ($from as $k => $v) {
  166. $toK = key($to);
  167. if ($toK === $k && $v === $to[$k]) {
  168. $start[$k] = $v;
  169. unset($from[$k], $to[$k]);
  170. } else {
  171. break;
  172. }
  173. }
  174. end($from);
  175. end($to);
  176. do {
  177. $fromK = key($from);
  178. $toK = key($to);
  179. if (null === $fromK || null === $toK || current($from) !== current($to)) {
  180. break;
  181. }
  182. prev($from);
  183. prev($to);
  184. $end = [$fromK => $from[$fromK]] + $end;
  185. unset($from[$fromK], $to[$toK]);
  186. } while (true);
  187. return [$from, $to, $start, $end];
  188. }
  189. }