Code Coverage for /data/phpBB/includes/diff/engine.php

	Code Coverage
	Classes and Traits			Functions and Methods				Lines
Total	0.00% covered (danger)	0.00%	0 / 1	0.00% covered (danger)	0.00%	0 / 5	CRAP	0.00% covered (danger)	0.00%	0 / 352
diff_engine	0.00% covered (danger)	0.00%	0 / 1	0.00% covered (danger)	0.00%	0 / 5	10100.00	0.00% covered (danger)	0.00%	0 / 348
diff				0.00% covered (danger)	0.00%	0 / 1	1190.00	0.00% covered (danger)	0.00%	0 / 119
_diag				0.00% covered (danger)	0.00%	0 / 1	420.00	0.00% covered (danger)	0.00%	0 / 84
_lcs_pos				0.00% covered (danger)	0.00%	0 / 1	30.00	0.00% covered (danger)	0.00%	0 / 26
_compareseq				0.00% covered (danger)	0.00%	0 / 1	182.00	0.00% covered (danger)	0.00%	0 / 42
_shift_boundaries				0.00% covered (danger)	0.00%	0 / 1	812.00	0.00% covered (danger)	0.00%	0 / 77

1	<?php
2	/**
3	*
4	* This file is part of the phpBB Forum Software package.
5	*
6	* @copyright (c) phpBB Limited <https://www.phpbb.com>
7	* @license GNU General Public License, version 2 (GPL-2.0)
8	*
9	* For full copyright and license information, please see
10	* the docs/CREDITS.txt file.
11	*
12	*/
13
14	/**
15	* @ignore
16	*/
17	if (!defined('IN_PHPBB'))
18	{
19	exit;
20	}
21
22	/**
23	* Code from pear.php.net, Text_Diff-1.1.0 package
24	* http://pear.php.net/package/Text_Diff/ (native engine)
25	*
26	* Modified by phpBB Limited to meet our coding standards
27	* and being able to integrate into phpBB
28	*
29	* Class used internally by Text_Diff to actually compute the diffs. This
30	* class is implemented using native PHP code.
31	*
32	* The algorithm used here is mostly lifted from the perl module
33	* Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
34	* http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
35	*
36	* More ideas are taken from: http://www.ics.uci.edu/~eppstein/161/960229.html
37	*
38	* Some ideas (and a bit of code) are taken from analyze.c, of GNU
39	* diffutils-2.7, which can be found at:
40	* ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
41	*
42	* Some ideas (subdivision by NCHUNKS > 2, and some optimizations) are from
43	* Geoffrey T. Dairiki <dairiki@dairiki.org>. The original PHP version of this
44	* code was written by him, and is used/adapted with his permission.
45	*
46	* Copyright 2004-2008 The Horde Project (http://www.horde.org/)
47	*
48	* @author Geoffrey T. Dairiki <dairiki@dairiki.org>
49	* @package diff
50	*
51	* @access private
52	*/
53	class diff_engine
54	{
55	/**
56	* If set to true we trim all lines before we compare them. This ensures that sole space/tab changes do not trigger diffs.
57	*/
58	var $skip_whitespace_changes = true;
59
60	function diff(&$from_lines, &$to_lines, $preserve_cr = true)
61	{
62	// Remove empty lines...
63	// If preserve_cr is true, we basically only change \r\n and bare \r to \n to get the same carriage returns for both files
64	// If it is false, we try to only use \n once per line and ommit all empty lines to be able to get a proper data diff
65
66	if (is_array($from_lines))
67	{
68	$from_lines = implode("\n", $from_lines);
69	}
70
71	if (is_array($to_lines))
72	{
73	$to_lines = implode("\n", $to_lines);
74	}
75
76	if ($preserve_cr)
77	{
78	$from_lines = explode("\n", str_replace("\r", "\n", str_replace("\r\n", "\n", $from_lines)));
79	$to_lines = explode("\n", str_replace("\r", "\n", str_replace("\r\n", "\n", $to_lines)));
80	}
81	else
82	{
83	$from_lines = explode("\n", preg_replace('#[\n\r]+#', "\n", $from_lines));
84	$to_lines = explode("\n", preg_replace('#[\n\r]+#', "\n", $to_lines));
85	}
86
87	$n_from = count($from_lines);
88	$n_to = count($to_lines);
89
90	$this->xchanged = $this->ychanged = $this->xv = $this->yv = $this->xind = $this->yind = array();
91	unset($this->seq, $this->in_seq, $this->lcs);
92
93	// Skip leading common lines.
94	for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++)
95	{
96	if (trim($from_lines[$skip]) !== trim($to_lines[$skip]))
97	{
98	break;
99	}
100	$this->xchanged[$skip] = $this->ychanged[$skip] = false;
101	}
102
103	// Skip trailing common lines.
104	$xi = $n_from;
105	$yi = $n_to;
106
107	for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++)
108	{
109	if (trim($from_lines[$xi]) !== trim($to_lines[$yi]))
110	{
111	break;
112	}
113	$this->xchanged[$xi] = $this->ychanged[$yi] = false;
114	}
115
116	// Ignore lines which do not exist in both files.
117	for ($xi = $skip; $xi < $n_from - $endskip; $xi++)
118	{
119	if ($this->skip_whitespace_changes) $xhash[trim($from_lines[$xi])] = 1; else $xhash[$from_lines[$xi]] = 1;
120	}
121
122	for ($yi = $skip; $yi < $n_to - $endskip; $yi++)
123	{
124	$line = ($this->skip_whitespace_changes) ? trim($to_lines[$yi]) : $to_lines[$yi];
125
126	if (($this->ychanged[$yi] = empty($xhash[$line])))
127	{
128	continue;
129	}
130	$yhash[$line] = 1;
131	$this->yv[] = $line;
132	$this->yind[] = $yi;
133	}
134
135	for ($xi = $skip; $xi < $n_from - $endskip; $xi++)
136	{
137	$line = ($this->skip_whitespace_changes) ? trim($from_lines[$xi]) : $from_lines[$xi];
138
139	if (($this->xchanged[$xi] = empty($yhash[$line])))
140	{
141	continue;
142	}
143	$this->xv[] = $line;
144	$this->xind[] = $xi;
145	}
146
147	// Find the LCS.
148	$this->_compareseq(0, count($this->xv), 0, count($this->yv));
149
150	// Merge edits when possible.
151	if ($this->skip_whitespace_changes)
152	{
153	$from_lines_clean = array_map('trim', $from_lines);
154	$to_lines_clean = array_map('trim', $to_lines);
155
156	$this->_shift_boundaries($from_lines_clean, $this->xchanged, $this->ychanged);
157	$this->_shift_boundaries($to_lines_clean, $this->ychanged, $this->xchanged);
158
159	unset($from_lines_clean, $to_lines_clean);
160	}
161	else
162	{
163	$this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
164	$this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
165	}
166
167	// Compute the edit operations.
168	$edits = array();
169	$xi = $yi = 0;
170
171	while ($xi < $n_from \|\| $yi < $n_to)
172	{
173	// Skip matching "snake".
174	$copy = array();
175
176	while ($xi < $n_from && $yi < $n_to && !$this->xchanged[$xi] && !$this->ychanged[$yi])
177	{
178	$copy[] = $from_lines[$xi++];
179	$yi++;
180	}
181
182	if ($copy)
183	{
184	$edits[] = new diff_op_copy($copy);
185	}
186
187	// Find deletes & adds.
188	$delete = array();
189	while ($xi < $n_from && $this->xchanged[$xi])
190	{
191	$delete[] = $from_lines[$xi++];
192	}
193
194	$add = array();
195	while ($yi < $n_to && $this->ychanged[$yi])
196	{
197	$add[] = $to_lines[$yi++];
198	}
199
200	if ($delete && $add)
201	{
202	$edits[] = new diff_op_change($delete, $add);
203	}
204	else if ($delete)
205	{
206	$edits[] = new diff_op_delete($delete);
207	}
208	else if ($add)
209	{
210	$edits[] = new diff_op_add($add);
211	}
212	}
213
214	return $edits;
215	}
216
217	/**
218	* Divides the Largest Common Subsequence (LCS) of the sequences (XOFF,
219	* XLIM) and (YOFF, YLIM) into NCHUNKS approximately equally sized segments.
220	*
221	* Returns (LCS, PTS). LCS is the length of the LCS. PTS is an array of
222	* NCHUNKS+1 (X, Y) indexes giving the diving points between sub
223	* sequences. The first sub-sequence is contained in (X0, X1), (Y0, Y1),
224	* the second in (X1, X2), (Y1, Y2) and so on. Note that (X0, Y0) ==
225	* (XOFF, YOFF) and (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
226	*
227	* This function assumes that the first lines of the specified portions of
228	* the two files do not match, and likewise that the last lines do not
229	* match. The caller must trim matching lines from the beginning and end
230	* of the portions it is going to specify.
231	*/
232	function _diag($xoff, $xlim, $yoff, $ylim, $nchunks)
233	{
234	$flip = false;
235
236	if ($xlim - $xoff > $ylim - $yoff)
237	{
238	// Things seems faster (I'm not sure I understand why) when the shortest sequence is in X.
239	$flip = true;
240	list($xoff, $xlim, $yoff, $ylim) = array($yoff, $ylim, $xoff, $xlim);
241	}
242
243	if ($flip)
244	{
245	for ($i = $ylim - 1; $i >= $yoff; $i--)
246	{
247	$ymatches[$this->xv[$i]][] = $i;
248	}
249	}
250	else
251	{
252	for ($i = $ylim - 1; $i >= $yoff; $i--)
253	{
254	$ymatches[$this->yv[$i]][] = $i;
255	}
256	}
257
258	$this->lcs = 0;
259	$this->seq[0]= $yoff - 1;
260	$this->in_seq = array();
261	$ymids[0] = array();
262
263	$numer = $xlim - $xoff + $nchunks - 1;
264	$x = $xoff;
265
266	for ($chunk = 0; $chunk < $nchunks; $chunk++)
267	{
268	if ($chunk > 0)
269	{
270	for ($i = 0; $i <= $this->lcs; $i++)
271	{
272	$ymids[$i][$chunk - 1] = $this->seq[$i];
273	}
274	}
275
276	$x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $chunk) / $nchunks);
277
278	for (; $x < $x1; $x++)
279	{
280	$line = $flip ? $this->yv[$x] : $this->xv[$x];
281	if (empty($ymatches[$line]))
282	{
283	continue;
284	}
285	$matches = $ymatches[$line];
286
287	reset($matches);
288	while ($y = current($matches))
289	{
290	next($matches);
291	if (empty($this->in_seq[$y]))
292	{
293	$k = $this->_lcs_pos($y);
294	$ymids[$k] = $ymids[$k - 1];
295	break;
296	}
297	}
298
299	// no reset() here
300	while ($y = current($matches))
301	{
302	next($matches);
303	if ($y > $this->seq[$k - 1])
304	{
305	// Optimization: this is a common case: next match is just replacing previous match.
306	$this->in_seq[$this->seq[$k]] = false;
307	$this->seq[$k] = $y;
308	$this->in_seq[$y] = 1;
309	}
310	else if (empty($this->in_seq[$y]))
311	{
312	$k = $this->_lcs_pos($y);
313	$ymids[$k] = $ymids[$k - 1];
314	}
315	}
316	}
317	}
318
319	$seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
320	$ymid = $ymids[$this->lcs];
321
322	for ($n = 0; $n < $nchunks - 1; $n++)
323	{
324	$x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
325	$y1 = $ymid[$n] + 1;
326	$seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
327	}
328	$seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
329
330	return array($this->lcs, $seps);
331	}
332
333	function _lcs_pos($ypos)
334	{
335	$end = $this->lcs;
336
337	if ($end == 0 \|\| $ypos > $this->seq[$end])
338	{
339	$this->seq[++$this->lcs] = $ypos;
340	$this->in_seq[$ypos] = 1;
341	return $this->lcs;
342	}
343
344	$beg = 1;
345	while ($beg < $end)
346	{
347	$mid = (int)(($beg + $end) / 2);
348	if ($ypos > $this->seq[$mid])
349	{
350	$beg = $mid + 1;
351	}
352	else
353	{
354	$end = $mid;
355	}
356	}
357
358	$this->in_seq[$this->seq[$end]] = false;
359	$this->seq[$end] = $ypos;
360	$this->in_seq[$ypos] = 1;
361
362	return $end;
363	}
364
365	/**
366	* Finds LCS of two sequences.
367	*
368	* The results are recorded in the vectors $this->{x,y}changed[], by
369	* storing a 1 in the element for each line that is an insertion or
370	* deletion (ie. is not in the LCS).
371	*
372	* The subsequence of file 0 is (XOFF, XLIM) and likewise for file 1.
373	*
374	* Note that XLIM, YLIM are exclusive bounds. All line numbers are
375	* origin-0 and discarded lines are not counted.
376	*/
377	function _compareseq($xoff, $xlim, $yoff, $ylim)
378	{
379	// Slide down the bottom initial diagonal.
380	while ($xoff < $xlim && $yoff < $ylim && $this->xv[$xoff] == $this->yv[$yoff])
381	{
382	++$xoff;
383	++$yoff;
384	}
385
386	// Slide up the top initial diagonal.
387	while ($xlim > $xoff && $ylim > $yoff && $this->xv[$xlim - 1] == $this->yv[$ylim - 1])
388	{
389	--$xlim;
390	--$ylim;
391	}
392
393	if ($xoff == $xlim \|\| $yoff == $ylim)
394	{
395	$lcs = 0;
396	}
397	else
398	{
399	// This is ad hoc but seems to work well.
400	// $nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
401	// $nchunks = max(2,min(8,(int)$nchunks));
402	$nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
403	list($lcs, $seps) = $this->_diag($xoff, $xlim, $yoff, $ylim, $nchunks);
404	}
405
406	if ($lcs == 0)
407	{
408	// X and Y sequences have no common subsequence: mark all changed.
409	while ($yoff < $ylim)
410	{
411	$this->ychanged[$this->yind[$yoff++]] = 1;
412	}
413
414	while ($xoff < $xlim)
415	{
416	$this->xchanged[$this->xind[$xoff++]] = 1;
417	}
418	}
419	else
420	{
421	// Use the partitions to split this problem into subproblems.
422	reset($seps);
423	$pt1 = $seps[0];
424
425	while ($pt2 = next($seps))
426	{
427	$this->_compareseq($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
428	$pt1 = $pt2;
429	}
430	}
431	}
432
433	/**
434	* Adjusts inserts/deletes of identical lines to join changes as much as possible.
435	*
436	* We do something when a run of changed lines include a line at one end
437	* and has an excluded, identical line at the other. We are free to
438	* choose which identical line is included. 'compareseq' usually chooses
439	* the one at the beginning, but usually it is cleaner to consider the
440	* following identical line to be the "change".
441	*
442	* This is extracted verbatim from analyze.c (GNU diffutils-2.7).
443	*/
444	function _shift_boundaries($lines, &$changed, $other_changed)
445	{
446	$i = 0;
447	$j = 0;
448
449	$len = count($lines);
450	$other_len = count($other_changed);
451
452	while (1)
453	{
454	// Scan forward to find the beginning of another run of
455	// changes. Also keep track of the corresponding point in the other file.
456	//
457	// Throughout this code, $i and $j are adjusted together so that
458	// the first $i elements of $changed and the first $j elements of
459	// $other_changed both contain the same number of zeros (unchanged lines).
460	//
461	// Furthermore, $j is always kept so that $j == $other_len or $other_changed[$j] == false.
462	while ($j < $other_len && $other_changed[$j])
463	{
464	$j++;
465	}
466
467	while ($i < $len && ! $changed[$i])
468	{
469	$i++;
470	$j++;
471
472	while ($j < $other_len && $other_changed[$j])
473	{
474	$j++;
475	}
476	}
477
478	if ($i == $len)
479	{
480	break;
481	}
482
483	$start = $i;
484
485	// Find the end of this run of changes.
486	while (++$i < $len && $changed[$i])
487	{
488	continue;
489	}
490
491	do
492	{
493	// Record the length of this run of changes, so that we can later determine whether the run has grown.
494	$runlength = $i - $start;
495
496	// Move the changed region back, so long as the previous unchanged line matches the last changed one.
497	// This merges with previous changed regions.
498	while ($start > 0 && $lines[$start - 1] == $lines[$i - 1])
499	{
500	$changed[--$start] = 1;
501	$changed[--$i] = false;
502
503	while ($start > 0 && $changed[$start - 1])
504	{
505	$start--;
506	}
507
508	while ($other_changed[--$j])
509	{
510	continue;
511	}
512	}
513
514	// Set CORRESPONDING to the end of the changed run, at the last point where it corresponds to a changed run in the
515	// other file. CORRESPONDING == LEN means no such point has been found.
516	$corresponding = $j < $other_len ? $i : $len;
517
518	// Move the changed region forward, so long as the first changed line matches the following unchanged one.
519	// This merges with following changed regions.
520	// Do this second, so that if there are no merges, the changed region is moved forward as far as possible.
521	while ($i < $len && $lines[$start] == $lines[$i])
522	{
523	$changed[$start++] = false;
524	$changed[$i++] = 1;
525
526	while ($i < $len && $changed[$i])
527	{
528	$i++;
529	}
530
531	$j++;
532	if ($j < $other_len && $other_changed[$j])
533	{
534	$corresponding = $i;
535	while ($j < $other_len && $other_changed[$j])
536	{
537	$j++;
538	}
539	}
540	}
541	}
542	while ($runlength != $i - $start);
543
544	// If possible, move the fully-merged run of changes back to a corresponding run in the other file.
545	while ($corresponding < $i)
546	{
547	$changed[--$start] = 1;
548	$changed[--$i] = 0;
549
550	while ($other_changed[--$j])
551	{
552	continue;
553	}
554	}
555	}
556	}
557	}