3 * This file is part of sebastian/diff.
5 * (c) Sebastian Bergmann <sebastian@phpunit.de>
7 * For the full copyright and license information, please view the LICENSE
8 * file that was distributed with this source code.
11 namespace SebastianBergmann\Diff;
13 use SebastianBergmann\Diff\LCS\LongestCommonSubsequence;
14 use SebastianBergmann\Diff\LCS\TimeEfficientImplementation;
15 use SebastianBergmann\Diff\LCS\MemoryEfficientImplementation;
18 * Diff implementation.
30 private $showNonDiffLines;
33 * @param string $header
34 * @param bool $showNonDiffLines
36 public function __construct($header = "--- Original\n+++ New\n", $showNonDiffLines = true)
38 $this->header = $header;
39 $this->showNonDiffLines = $showNonDiffLines;
43 * Returns the diff between two arrays or strings as string.
45 * @param array|string $from
46 * @param array|string $to
47 * @param LongestCommonSubsequence $lcs
51 public function diff($from, $to, LongestCommonSubsequence $lcs = null)
53 $from = $this->validateDiffInput($from);
54 $to = $this->validateDiffInput($to);
55 $diff = $this->diffToArray($from, $to, $lcs);
56 $old = $this->checkIfDiffInOld($diff);
57 $start = isset($old[0]) ? $old[0] : 0;
60 if ($tmp = \array_search($end, $old)) {
64 return $this->getBuffer($diff, $old, $start, $end);
68 * Casts variable to string if it is not a string or array.
74 private function validateDiffInput($input)
76 if (!\is_array($input) && !\is_string($input)) {
77 return (string) $input;
84 * Takes input of the diff array and returns the old array.
85 * Iterates through diff line by line,
91 private function checkIfDiffInOld(array $diff)
97 foreach ($diff as $line) {
98 if ($line[1] === 0 /* OLD */) {
99 if ($inOld === false) {
102 } elseif ($inOld !== false) {
103 if (($i - $inOld) > 5) {
104 $old[$inOld] = $i - 1;
117 * Generates buffer in string format, returning the patch.
126 private function getBuffer(array $diff, array $old, $start, $end)
128 $buffer = $this->header;
130 if (!isset($old[$start])) {
131 $buffer = $this->getDiffBufferElementNew($diff, $buffer, $start);
135 for ($i = $start; $i < $end; $i++) {
136 if (isset($old[$i])) {
138 $buffer = $this->getDiffBufferElementNew($diff, $buffer, $i);
140 $buffer = $this->getDiffBufferElement($diff, $buffer, $i);
148 * Gets individual buffer element.
151 * @param string $buffer
152 * @param int $diffIndex
156 private function getDiffBufferElement(array $diff, $buffer, $diffIndex)
158 if ($diff[$diffIndex][1] === 1 /* ADDED */) {
159 $buffer .= '+' . $diff[$diffIndex][0] . "\n";
160 } elseif ($diff[$diffIndex][1] === 2 /* REMOVED */) {
161 $buffer .= '-' . $diff[$diffIndex][0] . "\n";
162 } elseif ($this->showNonDiffLines === true) {
163 $buffer .= ' ' . $diff[$diffIndex][0] . "\n";
170 * Gets individual buffer element with opening.
173 * @param string $buffer
174 * @param int $diffIndex
178 private function getDiffBufferElementNew(array $diff, $buffer, $diffIndex)
180 if ($this->showNonDiffLines === true) {
181 $buffer .= "@@ @@\n";
184 return $this->getDiffBufferElement($diff, $buffer, $diffIndex);
188 * Returns the diff between two arrays or strings as array.
190 * Each array element contains two elements:
191 * - [0] => mixed $token
194 * - 2: REMOVED: $token was removed from $from
195 * - 1: ADDED: $token was added to $from
196 * - 0: OLD: $token is not changed in $to
198 * @param array|string $from
199 * @param array|string $to
200 * @param LongestCommonSubsequence $lcs
204 public function diffToArray($from, $to, LongestCommonSubsequence $lcs = null)
206 if (\is_string($from)) {
207 $fromMatches = $this->getNewLineMatches($from);
208 $from = $this->splitStringByLines($from);
209 } elseif (\is_array($from)) {
210 $fromMatches = array();
212 throw new \InvalidArgumentException('"from" must be an array or string.');
215 if (\is_string($to)) {
216 $toMatches = $this->getNewLineMatches($to);
217 $to = $this->splitStringByLines($to);
218 } elseif (\is_array($to)) {
219 $toMatches = array();
221 throw new \InvalidArgumentException('"to" must be an array or string.');
224 list($from, $to, $start, $end) = self::getArrayDiffParted($from, $to);
227 $lcs = $this->selectLcsImplementation($from, $to);
230 $common = $lcs->calculate(\array_values($from), \array_values($to));
233 if ($this->detectUnmatchedLineEndings($fromMatches, $toMatches)) {
235 '#Warning: Strings contain different line endings!',
240 foreach ($start as $token) {
241 $diff[] = array($token, 0 /* OLD */);
247 foreach ($common as $token) {
248 while (($fromToken = \reset($from)) !== $token) {
249 $diff[] = array(\array_shift($from), 2 /* REMOVED */);
252 while (($toToken = \reset($to)) !== $token) {
253 $diff[] = array(\array_shift($to), 1 /* ADDED */);
256 $diff[] = array($token, 0 /* OLD */);
262 while (($token = \array_shift($from)) !== null) {
263 $diff[] = array($token, 2 /* REMOVED */);
266 while (($token = \array_shift($to)) !== null) {
267 $diff[] = array($token, 1 /* ADDED */);
270 foreach ($end as $token) {
271 $diff[] = array($token, 0 /* OLD */);
278 * Get new strings denoting new lines from a given string.
280 * @param string $string
284 private function getNewLineMatches($string)
286 \preg_match_all('(\r\n|\r|\n)', $string, $stringMatches);
288 return $stringMatches;
292 * Checks if input is string, if so it will split it line-by-line.
294 * @param string $input
298 private function splitStringByLines($input)
300 return \preg_split('(\r\n|\r|\n)', $input);
307 * @return LongestCommonSubsequence
309 private function selectLcsImplementation(array $from, array $to)
311 // We do not want to use the time-efficient implementation if its memory
312 // footprint will probably exceed this value. Note that the footprint
313 // calculation is only an estimation for the matrix and the LCS method
314 // will typically allocate a bit more memory than this.
315 $memoryLimit = 100 * 1024 * 1024;
317 if ($this->calculateEstimatedFootprint($from, $to) > $memoryLimit) {
318 return new MemoryEfficientImplementation;
321 return new TimeEfficientImplementation;
325 * Calculates the estimated memory footprint for the DP-based method.
332 private function calculateEstimatedFootprint(array $from, array $to)
334 $itemSize = PHP_INT_SIZE === 4 ? 76 : 144;
336 return $itemSize * \pow(\min(\count($from), \count($to)), 2);
340 * Returns true if line ends don't match on fromMatches and toMatches.
342 * @param array $fromMatches
343 * @param array $toMatches
347 private function detectUnmatchedLineEndings(array $fromMatches, array $toMatches)
349 return isset($fromMatches[0], $toMatches[0]) &&
350 \count($fromMatches[0]) === \count($toMatches[0]) &&
351 $fromMatches[0] !== $toMatches[0];
360 private static function getArrayDiffParted(array &$from, array &$to)
367 foreach ($from as $k => $v) {
370 if ($toK === $k && $v === $to[$k]) {
373 unset($from[$k], $to[$k]);
383 $fromK = \key($from);
386 if (null === $fromK || null === $toK || \current($from) !== \current($to)) {
393 $end = array($fromK => $from[$fromK]) + $end;
394 unset($from[$fromK], $to[$toK]);
397 return array($from, $to, $start, $end);