123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239 |
- <?php declare(strict_types=1);
- /*
- * This file is part of sebastian/diff.
- *
- * (c) Sebastian Bergmann <sebastian@phpunit.de>
- *
- * For the full copyright and license information, please view the LICENSE
- * file that was distributed with this source code.
- */
- namespace SebastianBergmann\Diff;
- use const PHP_INT_SIZE;
- use const PREG_SPLIT_DELIM_CAPTURE;
- use const PREG_SPLIT_NO_EMPTY;
- use function array_shift;
- use function array_unshift;
- use function array_values;
- use function count;
- use function current;
- use function end;
- use function is_string;
- use function key;
- use function min;
- use function preg_split;
- use function prev;
- use function reset;
- use function str_ends_with;
- use function substr;
- use SebastianBergmann\Diff\Output\DiffOutputBuilderInterface;
- final class Differ
- {
- public const OLD = 0;
- public const ADDED = 1;
- public const REMOVED = 2;
- public const DIFF_LINE_END_WARNING = 3;
- public const NO_LINE_END_EOF_WARNING = 4;
- private DiffOutputBuilderInterface $outputBuilder;
- public function __construct(DiffOutputBuilderInterface $outputBuilder)
- {
- $this->outputBuilder = $outputBuilder;
- }
- public function diff(array|string $from, array|string $to, ?LongestCommonSubsequenceCalculator $lcs = null): string
- {
- $diff = $this->diffToArray($from, $to, $lcs);
- return $this->outputBuilder->getDiff($diff);
- }
- public function diffToArray(array|string $from, array|string $to, ?LongestCommonSubsequenceCalculator $lcs = null): array
- {
- if (is_string($from)) {
- $from = $this->splitStringByLines($from);
- }
- if (is_string($to)) {
- $to = $this->splitStringByLines($to);
- }
- [$from, $to, $start, $end] = self::getArrayDiffParted($from, $to);
- if ($lcs === null) {
- $lcs = $this->selectLcsImplementation($from, $to);
- }
- $common = $lcs->calculate(array_values($from), array_values($to));
- $diff = [];
- foreach ($start as $token) {
- $diff[] = [$token, self::OLD];
- }
- reset($from);
- reset($to);
- foreach ($common as $token) {
- while (($fromToken = reset($from)) !== $token) {
- $diff[] = [array_shift($from), self::REMOVED];
- }
- while (($toToken = reset($to)) !== $token) {
- $diff[] = [array_shift($to), self::ADDED];
- }
- $diff[] = [$token, self::OLD];
- array_shift($from);
- array_shift($to);
- }
- while (($token = array_shift($from)) !== null) {
- $diff[] = [$token, self::REMOVED];
- }
- while (($token = array_shift($to)) !== null) {
- $diff[] = [$token, self::ADDED];
- }
- foreach ($end as $token) {
- $diff[] = [$token, self::OLD];
- }
- if ($this->detectUnmatchedLineEndings($diff)) {
- array_unshift($diff, ["#Warning: Strings contain different line endings!\n", self::DIFF_LINE_END_WARNING]);
- }
- return $diff;
- }
- private function splitStringByLines(string $input): array
- {
- return preg_split('/(.*\R)/', $input, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
- }
- private function selectLcsImplementation(array $from, array $to): LongestCommonSubsequenceCalculator
- {
- // We do not want to use the time-efficient implementation if its memory
- // footprint will probably exceed this value. Note that the footprint
- // calculation is only an estimation for the matrix and the LCS method
- // will typically allocate a bit more memory than this.
- $memoryLimit = 100 * 1024 * 1024;
- if ($this->calculateEstimatedFootprint($from, $to) > $memoryLimit) {
- return new MemoryEfficientLongestCommonSubsequenceCalculator;
- }
- return new TimeEfficientLongestCommonSubsequenceCalculator;
- }
- private function calculateEstimatedFootprint(array $from, array $to): float|int
- {
- $itemSize = PHP_INT_SIZE === 4 ? 76 : 144;
- return $itemSize * min(count($from), count($to)) ** 2;
- }
- private function detectUnmatchedLineEndings(array $diff): bool
- {
- $newLineBreaks = ['' => true];
- $oldLineBreaks = ['' => true];
- foreach ($diff as $entry) {
- if (self::OLD === $entry[1]) {
- $ln = $this->getLinebreak($entry[0]);
- $oldLineBreaks[$ln] = true;
- $newLineBreaks[$ln] = true;
- } elseif (self::ADDED === $entry[1]) {
- $newLineBreaks[$this->getLinebreak($entry[0])] = true;
- } elseif (self::REMOVED === $entry[1]) {
- $oldLineBreaks[$this->getLinebreak($entry[0])] = true;
- }
- }
- // if either input or output is a single line without breaks than no warning should be raised
- if (['' => true] === $newLineBreaks || ['' => true] === $oldLineBreaks) {
- return false;
- }
- // two-way compare
- foreach ($newLineBreaks as $break => $set) {
- if (!isset($oldLineBreaks[$break])) {
- return true;
- }
- }
- foreach ($oldLineBreaks as $break => $set) {
- if (!isset($newLineBreaks[$break])) {
- return true;
- }
- }
- return false;
- }
- private function getLinebreak($line): string
- {
- if (!is_string($line)) {
- return '';
- }
- $lc = substr($line, -1);
- if ("\r" === $lc) {
- return "\r";
- }
- if ("\n" !== $lc) {
- return '';
- }
- if (str_ends_with($line, "\r\n")) {
- return "\r\n";
- }
- return "\n";
- }
- private static function getArrayDiffParted(array &$from, array &$to): array
- {
- $start = [];
- $end = [];
- reset($to);
- foreach ($from as $k => $v) {
- $toK = key($to);
- if ($toK === $k && $v === $to[$k]) {
- $start[$k] = $v;
- unset($from[$k], $to[$k]);
- } else {
- break;
- }
- }
- end($from);
- end($to);
- do {
- $fromK = key($from);
- $toK = key($to);
- if (null === $fromK || null === $toK || current($from) !== current($to)) {
- break;
- }
- prev($from);
- prev($to);
- $end = [$fromK => $from[$fromK]] + $end;
- unset($from[$fromK], $to[$toK]);
- } while (true);
- return [$from, $to, $start, $end];
- }
- }
|