Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 197 |
|
0.00% |
0 / 5 |
CRAP | |
0.00% |
0 / 1 |
diff_engine | |
0.00% |
0 / 195 |
|
0.00% |
0 / 5 |
9900 | |
0.00% |
0 / 1 |
diff | |
0.00% |
0 / 69 |
|
0.00% |
0 / 1 |
1190 | |||
_diag | |
0.00% |
0 / 49 |
|
0.00% |
0 / 1 |
420 | |||
_lcs_pos | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
30 | |||
_compareseq | |
0.00% |
0 / 20 |
|
0.00% |
0 / 1 |
182 | |||
_shift_boundaries | |
0.00% |
0 / 42 |
|
0.00% |
0 / 1 |
756 |
1 | <?php |
2 | /** |
3 | * |
4 | * This file is part of the phpBB Forum Software package. |
5 | * |
6 | * @copyright (c) phpBB Limited <https://www.phpbb.com> |
7 | * @license GNU General Public License, version 2 (GPL-2.0) |
8 | * |
9 | * For full copyright and license information, please see |
10 | * the docs/CREDITS.txt file. |
11 | * |
12 | */ |
13 | |
14 | /** |
15 | * @ignore |
16 | */ |
17 | if (!defined('IN_PHPBB')) |
18 | { |
19 | exit; |
20 | } |
21 | |
22 | /** |
23 | * Code from pear.php.net, Text_Diff-1.1.0 package |
24 | * http://pear.php.net/package/Text_Diff/ (native engine) |
25 | * |
26 | * Modified by phpBB Limited to meet our coding standards |
27 | * and being able to integrate into phpBB |
28 | * |
29 | * Class used internally by Text_Diff to actually compute the diffs. This |
30 | * class is implemented using native PHP code. |
31 | * |
32 | * The algorithm used here is mostly lifted from the perl module |
33 | * Algorithm::Diff (version 1.06) by Ned Konz, which is available at: |
34 | * http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip |
35 | * |
36 | * More ideas are taken from: http://www.ics.uci.edu/~eppstein/161/960229.html |
37 | * |
38 | * Some ideas (and a bit of code) are taken from analyze.c, of GNU |
39 | * diffutils-2.7, which can be found at: |
40 | * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz |
41 | * |
42 | * Some ideas (subdivision by NCHUNKS > 2, and some optimizations) are from |
43 | * Geoffrey T. Dairiki <dairiki@dairiki.org>. The original PHP version of this |
44 | * code was written by him, and is used/adapted with his permission. |
45 | * |
46 | * Copyright 2004-2008 The Horde Project (http://www.horde.org/) |
47 | * |
48 | * @author Geoffrey T. Dairiki <dairiki@dairiki.org> |
49 | * @package diff |
50 | * |
51 | * @access private |
52 | */ |
53 | class diff_engine |
54 | { |
55 | /** |
56 | * If set to true we trim all lines before we compare them. This ensures that sole space/tab changes do not trigger diffs. |
57 | */ |
58 | var $skip_whitespace_changes = true; |
59 | |
60 | function diff(&$from_lines, &$to_lines, $preserve_cr = true) |
61 | { |
62 | // Remove empty lines... |
63 | // If preserve_cr is true, we basically only change \r\n and bare \r to \n to get the same carriage returns for both files |
64 | // If it is false, we try to only use \n once per line and ommit all empty lines to be able to get a proper data diff |
65 | |
66 | if (is_array($from_lines)) |
67 | { |
68 | $from_lines = implode("\n", $from_lines); |
69 | } |
70 | |
71 | if (is_array($to_lines)) |
72 | { |
73 | $to_lines = implode("\n", $to_lines); |
74 | } |
75 | |
76 | if ($preserve_cr) |
77 | { |
78 | $from_lines = explode("\n", str_replace("\r", "\n", str_replace("\r\n", "\n", $from_lines))); |
79 | $to_lines = explode("\n", str_replace("\r", "\n", str_replace("\r\n", "\n", $to_lines))); |
80 | } |
81 | else |
82 | { |
83 | $from_lines = explode("\n", preg_replace('#[\n\r]+#', "\n", $from_lines)); |
84 | $to_lines = explode("\n", preg_replace('#[\n\r]+#', "\n", $to_lines)); |
85 | } |
86 | |
87 | $n_from = count($from_lines); |
88 | $n_to = count($to_lines); |
89 | |
90 | $this->xchanged = $this->ychanged = $this->xv = $this->yv = $this->xind = $this->yind = array(); |
91 | unset($this->seq, $this->in_seq, $this->lcs); |
92 | |
93 | // Skip leading common lines. |
94 | for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) |
95 | { |
96 | if (trim($from_lines[$skip]) !== trim($to_lines[$skip])) |
97 | { |
98 | break; |
99 | } |
100 | $this->xchanged[$skip] = $this->ychanged[$skip] = false; |
101 | } |
102 | |
103 | // Skip trailing common lines. |
104 | $xi = $n_from; |
105 | $yi = $n_to; |
106 | |
107 | for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) |
108 | { |
109 | if (trim($from_lines[$xi]) !== trim($to_lines[$yi])) |
110 | { |
111 | break; |
112 | } |
113 | $this->xchanged[$xi] = $this->ychanged[$yi] = false; |
114 | } |
115 | |
116 | // Ignore lines which do not exist in both files. |
117 | for ($xi = $skip; $xi < $n_from - $endskip; $xi++) |
118 | { |
119 | if ($this->skip_whitespace_changes) $xhash[trim($from_lines[$xi])] = 1; else $xhash[$from_lines[$xi]] = 1; |
120 | } |
121 | |
122 | for ($yi = $skip; $yi < $n_to - $endskip; $yi++) |
123 | { |
124 | $line = ($this->skip_whitespace_changes) ? trim($to_lines[$yi]) : $to_lines[$yi]; |
125 | |
126 | if (($this->ychanged[$yi] = empty($xhash[$line]))) |
127 | { |
128 | continue; |
129 | } |
130 | $yhash[$line] = 1; |
131 | $this->yv[] = $line; |
132 | $this->yind[] = $yi; |
133 | } |
134 | |
135 | for ($xi = $skip; $xi < $n_from - $endskip; $xi++) |
136 | { |
137 | $line = ($this->skip_whitespace_changes) ? trim($from_lines[$xi]) : $from_lines[$xi]; |
138 | |
139 | if (($this->xchanged[$xi] = empty($yhash[$line]))) |
140 | { |
141 | continue; |
142 | } |
143 | $this->xv[] = $line; |
144 | $this->xind[] = $xi; |
145 | } |
146 | |
147 | // Find the LCS. |
148 | $this->_compareseq(0, count($this->xv), 0, count($this->yv)); |
149 | |
150 | // Merge edits when possible. |
151 | if ($this->skip_whitespace_changes) |
152 | { |
153 | $from_lines_clean = array_map('trim', $from_lines); |
154 | $to_lines_clean = array_map('trim', $to_lines); |
155 | |
156 | $this->_shift_boundaries($from_lines_clean, $this->xchanged, $this->ychanged); |
157 | $this->_shift_boundaries($to_lines_clean, $this->ychanged, $this->xchanged); |
158 | |
159 | unset($from_lines_clean, $to_lines_clean); |
160 | } |
161 | else |
162 | { |
163 | $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged); |
164 | $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged); |
165 | } |
166 | |
167 | // Compute the edit operations. |
168 | $edits = array(); |
169 | $xi = $yi = 0; |
170 | |
171 | while ($xi < $n_from || $yi < $n_to) |
172 | { |
173 | // Skip matching "snake". |
174 | $copy = array(); |
175 | |
176 | while ($xi < $n_from && $yi < $n_to && !$this->xchanged[$xi] && !$this->ychanged[$yi]) |
177 | { |
178 | $copy[] = $from_lines[$xi++]; |
179 | $yi++; |
180 | } |
181 | |
182 | if ($copy) |
183 | { |
184 | $edits[] = new diff_op_copy($copy); |
185 | } |
186 | |
187 | // Find deletes & adds. |
188 | $delete = array(); |
189 | while ($xi < $n_from && $this->xchanged[$xi]) |
190 | { |
191 | $delete[] = $from_lines[$xi++]; |
192 | } |
193 | |
194 | $add = array(); |
195 | while ($yi < $n_to && $this->ychanged[$yi]) |
196 | { |
197 | $add[] = $to_lines[$yi++]; |
198 | } |
199 | |
200 | if ($delete && $add) |
201 | { |
202 | $edits[] = new diff_op_change($delete, $add); |
203 | } |
204 | else if ($delete) |
205 | { |
206 | $edits[] = new diff_op_delete($delete); |
207 | } |
208 | else if ($add) |
209 | { |
210 | $edits[] = new diff_op_add($add); |
211 | } |
212 | } |
213 | |
214 | return $edits; |
215 | } |
216 | |
217 | /** |
218 | * Divides the Largest Common Subsequence (LCS) of the sequences (XOFF, |
219 | * XLIM) and (YOFF, YLIM) into NCHUNKS approximately equally sized segments. |
220 | * |
221 | * Returns (LCS, PTS). LCS is the length of the LCS. PTS is an array of |
222 | * NCHUNKS+1 (X, Y) indexes giving the diving points between sub |
223 | * sequences. The first sub-sequence is contained in (X0, X1), (Y0, Y1), |
224 | * the second in (X1, X2), (Y1, Y2) and so on. Note that (X0, Y0) == |
225 | * (XOFF, YOFF) and (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM). |
226 | * |
227 | * This function assumes that the first lines of the specified portions of |
228 | * the two files do not match, and likewise that the last lines do not |
229 | * match. The caller must trim matching lines from the beginning and end |
230 | * of the portions it is going to specify. |
231 | */ |
232 | function _diag($xoff, $xlim, $yoff, $ylim, $nchunks) |
233 | { |
234 | $flip = false; |
235 | |
236 | if ($xlim - $xoff > $ylim - $yoff) |
237 | { |
238 | // Things seems faster (I'm not sure I understand why) when the shortest sequence is in X. |
239 | $flip = true; |
240 | list($xoff, $xlim, $yoff, $ylim) = array($yoff, $ylim, $xoff, $xlim); |
241 | } |
242 | |
243 | if ($flip) |
244 | { |
245 | for ($i = $ylim - 1; $i >= $yoff; $i--) |
246 | { |
247 | $ymatches[$this->xv[$i]][] = $i; |
248 | } |
249 | } |
250 | else |
251 | { |
252 | for ($i = $ylim - 1; $i >= $yoff; $i--) |
253 | { |
254 | $ymatches[$this->yv[$i]][] = $i; |
255 | } |
256 | } |
257 | |
258 | $this->lcs = 0; |
259 | $this->seq[0]= $yoff - 1; |
260 | $this->in_seq = array(); |
261 | $ymids[0] = array(); |
262 | |
263 | $numer = $xlim - $xoff + $nchunks - 1; |
264 | $x = $xoff; |
265 | |
266 | for ($chunk = 0; $chunk < $nchunks; $chunk++) |
267 | { |
268 | if ($chunk > 0) |
269 | { |
270 | for ($i = 0; $i <= $this->lcs; $i++) |
271 | { |
272 | $ymids[$i][$chunk - 1] = $this->seq[$i]; |
273 | } |
274 | } |
275 | |
276 | $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $chunk) / $nchunks); |
277 | |
278 | for (; $x < $x1; $x++) |
279 | { |
280 | $line = $flip ? $this->yv[$x] : $this->xv[$x]; |
281 | if (empty($ymatches[$line])) |
282 | { |
283 | continue; |
284 | } |
285 | $matches = $ymatches[$line]; |
286 | |
287 | reset($matches); |
288 | while ($y = current($matches)) |
289 | { |
290 | next($matches); |
291 | if (empty($this->in_seq[$y])) |
292 | { |
293 | $k = $this->_lcs_pos($y); |
294 | $ymids[$k] = $ymids[$k - 1]; |
295 | break; |
296 | } |
297 | } |
298 | |
299 | // no reset() here |
300 | while ($y = current($matches)) |
301 | { |
302 | next($matches); |
303 | if ($y > $this->seq[$k - 1]) |
304 | { |
305 | // Optimization: this is a common case: next match is just replacing previous match. |
306 | $this->in_seq[$this->seq[$k]] = false; |
307 | $this->seq[$k] = $y; |
308 | $this->in_seq[$y] = 1; |
309 | } |
310 | else if (empty($this->in_seq[$y])) |
311 | { |
312 | $k = $this->_lcs_pos($y); |
313 | $ymids[$k] = $ymids[$k - 1]; |
314 | } |
315 | } |
316 | } |
317 | } |
318 | |
319 | $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff); |
320 | $ymid = $ymids[$this->lcs]; |
321 | |
322 | for ($n = 0; $n < $nchunks - 1; $n++) |
323 | { |
324 | $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks); |
325 | $y1 = $ymid[$n] + 1; |
326 | $seps[] = $flip ? array($y1, $x1) : array($x1, $y1); |
327 | } |
328 | $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim); |
329 | |
330 | return array($this->lcs, $seps); |
331 | } |
332 | |
333 | function _lcs_pos($ypos) |
334 | { |
335 | $end = $this->lcs; |
336 | |
337 | if ($end == 0 || $ypos > $this->seq[$end]) |
338 | { |
339 | $this->seq[++$this->lcs] = $ypos; |
340 | $this->in_seq[$ypos] = 1; |
341 | return $this->lcs; |
342 | } |
343 | |
344 | $beg = 1; |
345 | while ($beg < $end) |
346 | { |
347 | $mid = (int)(($beg + $end) / 2); |
348 | if ($ypos > $this->seq[$mid]) |
349 | { |
350 | $beg = $mid + 1; |
351 | } |
352 | else |
353 | { |
354 | $end = $mid; |
355 | } |
356 | } |
357 | |
358 | $this->in_seq[$this->seq[$end]] = false; |
359 | $this->seq[$end] = $ypos; |
360 | $this->in_seq[$ypos] = 1; |
361 | |
362 | return $end; |
363 | } |
364 | |
365 | /** |
366 | * Finds LCS of two sequences. |
367 | * |
368 | * The results are recorded in the vectors $this->{x,y}changed[], by |
369 | * storing a 1 in the element for each line that is an insertion or |
370 | * deletion (ie. is not in the LCS). |
371 | * |
372 | * The subsequence of file 0 is (XOFF, XLIM) and likewise for file 1. |
373 | * |
374 | * Note that XLIM, YLIM are exclusive bounds. All line numbers are |
375 | * origin-0 and discarded lines are not counted. |
376 | */ |
377 | function _compareseq($xoff, $xlim, $yoff, $ylim) |
378 | { |
379 | // Slide down the bottom initial diagonal. |
380 | while ($xoff < $xlim && $yoff < $ylim && $this->xv[$xoff] == $this->yv[$yoff]) |
381 | { |
382 | ++$xoff; |
383 | ++$yoff; |
384 | } |
385 | |
386 | // Slide up the top initial diagonal. |
387 | while ($xlim > $xoff && $ylim > $yoff && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) |
388 | { |
389 | --$xlim; |
390 | --$ylim; |
391 | } |
392 | |
393 | if ($xoff == $xlim || $yoff == $ylim) |
394 | { |
395 | $lcs = 0; |
396 | } |
397 | else |
398 | { |
399 | // This is ad hoc but seems to work well. |
400 | // $nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5); |
401 | // $nchunks = max(2,min(8,(int)$nchunks)); |
402 | $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1; |
403 | list($lcs, $seps) = $this->_diag($xoff, $xlim, $yoff, $ylim, $nchunks); |
404 | } |
405 | |
406 | if ($lcs == 0) |
407 | { |
408 | // X and Y sequences have no common subsequence: mark all changed. |
409 | while ($yoff < $ylim) |
410 | { |
411 | $this->ychanged[$this->yind[$yoff++]] = 1; |
412 | } |
413 | |
414 | while ($xoff < $xlim) |
415 | { |
416 | $this->xchanged[$this->xind[$xoff++]] = 1; |
417 | } |
418 | } |
419 | else |
420 | { |
421 | // Use the partitions to split this problem into subproblems. |
422 | reset($seps); |
423 | $pt1 = $seps[0]; |
424 | |
425 | while ($pt2 = next($seps)) |
426 | { |
427 | $this->_compareseq($pt1[0], $pt2[0], $pt1[1], $pt2[1]); |
428 | $pt1 = $pt2; |
429 | } |
430 | } |
431 | } |
432 | |
433 | /** |
434 | * Adjusts inserts/deletes of identical lines to join changes as much as possible. |
435 | * |
436 | * We do something when a run of changed lines include a line at one end |
437 | * and has an excluded, identical line at the other. We are free to |
438 | * choose which identical line is included. 'compareseq' usually chooses |
439 | * the one at the beginning, but usually it is cleaner to consider the |
440 | * following identical line to be the "change". |
441 | * |
442 | * This is extracted verbatim from analyze.c (GNU diffutils-2.7). |
443 | */ |
444 | function _shift_boundaries($lines, &$changed, $other_changed) |
445 | { |
446 | $i = 0; |
447 | $j = 0; |
448 | |
449 | $len = count($lines); |
450 | $other_len = count($other_changed); |
451 | |
452 | while (1) |
453 | { |
454 | // Scan forward to find the beginning of another run of |
455 | // changes. Also keep track of the corresponding point in the other file. |
456 | // |
457 | // Throughout this code, $i and $j are adjusted together so that |
458 | // the first $i elements of $changed and the first $j elements of |
459 | // $other_changed both contain the same number of zeros (unchanged lines). |
460 | // |
461 | // Furthermore, $j is always kept so that $j == $other_len or $other_changed[$j] == false. |
462 | while ($j < $other_len && $other_changed[$j]) |
463 | { |
464 | $j++; |
465 | } |
466 | |
467 | while ($i < $len && ! $changed[$i]) |
468 | { |
469 | $i++; |
470 | $j++; |
471 | |
472 | while ($j < $other_len && $other_changed[$j]) |
473 | { |
474 | $j++; |
475 | } |
476 | } |
477 | |
478 | if ($i == $len) |
479 | { |
480 | break; |
481 | } |
482 | |
483 | $start = $i; |
484 | |
485 | // Find the end of this run of changes. |
486 | while (++$i < $len && $changed[$i]) |
487 | { |
488 | continue; |
489 | } |
490 | |
491 | do |
492 | { |
493 | // Record the length of this run of changes, so that we can later determine whether the run has grown. |
494 | $runlength = $i - $start; |
495 | |
496 | // Move the changed region back, so long as the previous unchanged line matches the last changed one. |
497 | // This merges with previous changed regions. |
498 | while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) |
499 | { |
500 | $changed[--$start] = 1; |
501 | $changed[--$i] = false; |
502 | |
503 | while ($start > 0 && $changed[$start - 1]) |
504 | { |
505 | $start--; |
506 | } |
507 | |
508 | while ($other_changed[--$j]) |
509 | { |
510 | continue; |
511 | } |
512 | } |
513 | |
514 | // Set CORRESPONDING to the end of the changed run, at the last point where it corresponds to a changed run in the |
515 | // other file. CORRESPONDING == LEN means no such point has been found. |
516 | $corresponding = $j < $other_len ? $i : $len; |
517 | |
518 | // Move the changed region forward, so long as the first changed line matches the following unchanged one. |
519 | // This merges with following changed regions. |
520 | // Do this second, so that if there are no merges, the changed region is moved forward as far as possible. |
521 | while ($i < $len && $lines[$start] == $lines[$i]) |
522 | { |
523 | $changed[$start++] = false; |
524 | $changed[$i++] = 1; |
525 | |
526 | while ($i < $len && $changed[$i]) |
527 | { |
528 | $i++; |
529 | } |
530 | |
531 | $j++; |
532 | if ($j < $other_len && $other_changed[$j]) |
533 | { |
534 | $corresponding = $i; |
535 | while ($j < $other_len && $other_changed[$j]) |
536 | { |
537 | $j++; |
538 | } |
539 | } |
540 | } |
541 | } |
542 | while ($runlength != $i - $start); |
543 | |
544 | // If possible, move the fully-merged run of changes back to a corresponding run in the other file. |
545 | while ($corresponding < $i) |
546 | { |
547 | $changed[--$start] = 1; |
548 | $changed[--$i] = 0; |
549 | |
550 | while ($other_changed[--$j]) |
551 | { |
552 | continue; |
553 | } |
554 | } |
555 | } |
556 | } |
557 | } |