Code Coverage for /data/phpBB/includes/diff/engine.php

	Code Coverage
	Classes and Traits			Functions and Methods				Lines
Total	0.00% covered (danger)	0.00%	0 / 1	0.00% covered (danger)	0.00%	0 / 5	CRAP	0.00% covered (danger)	0.00%	0 / 350
diff_engine	0.00% covered (danger)	0.00%	0 / 1	0.00% covered (danger)	0.00%	0 / 5	10100	0.00% covered (danger)	0.00%	0 / 346
diff				0.00% covered (danger)	0.00%	0 / 1	1190	0.00% covered (danger)	0.00%	0 / 119
_diag				0.00% covered (danger)	0.00%	0 / 1	420	0.00% covered (danger)	0.00%	0 / 82
_lcs_pos				0.00% covered (danger)	0.00%	0 / 1	30	0.00% covered (danger)	0.00%	0 / 26
_compareseq				0.00% covered (danger)	0.00%	0 / 1	182	0.00% covered (danger)	0.00%	0 / 42
_shift_boundaries				0.00% covered (danger)	0.00%	0 / 1	812	0.00% covered (danger)	0.00%	0 / 77

1	<?php
2	/**
3	*
4	* This file is part of the phpBB Forum Software package.
5	*
6	* @copyright (c) phpBB Limited <https://www.phpbb.com>
7	* @license GNU General Public License, version 2 (GPL-2.0)
8	*
9	* For full copyright and license information, please see
10	* the docs/CREDITS.txt file.
11	*
12	*/
13
14	/**
15	* @ignore
16	*/
17	if (!defined('IN_PHPBB'))
18	{
19	exit;
20	}
21
22	/**
23	* Code from pear.php.net, Text_Diff-1.1.0 package
24	* http://pear.php.net/package/Text_Diff/ (native engine)
25	*
26	* Modified by phpBB Limited to meet our coding standards
27	* and being able to integrate into phpBB
28	*
29	* Class used internally by Text_Diff to actually compute the diffs. This
30	* class is implemented using native PHP code.
31	*
32	* The algorithm used here is mostly lifted from the perl module
33	* Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
34	* http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
35	*
36	* More ideas are taken from: http://www.ics.uci.edu/~eppstein/161/960229.html
37	*
38	* Some ideas (and a bit of code) are taken from analyze.c, of GNU
39	* diffutils-2.7, which can be found at:
40	* ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
41	*
42	* Some ideas (subdivision by NCHUNKS > 2, and some optimizations) are from
43	* Geoffrey T. Dairiki <dairiki@dairiki.org>. The original PHP version of this
44	* code was written by him, and is used/adapted with his permission.
45	*
46	* Copyright 2004-2008 The Horde Project (http://www.horde.org/)
47	*
48	* @author Geoffrey T. Dairiki <dairiki@dairiki.org>
49	* @package diff
50	*
51	* @access private
52	*/
53	class diff_engine
54	{
55	/**
56	* If set to true we trim all lines before we compare them. This ensures that sole space/tab changes do not trigger diffs.
57	*/
58	var $skip_whitespace_changes = true;
59
60	function diff(&$from_lines, &$to_lines, $preserve_cr = true)
61	{
62	// Remove empty lines...
63	// If preserve_cr is true, we basically only change \r\n and bare \r to \n to get the same carriage returns for both files
64	// If it is false, we try to only use \n once per line and ommit all empty lines to be able to get a proper data diff
65
66	if (is_array($from_lines))
67	{
68	$from_lines = implode("\n", $from_lines);
69	}
70
71	if (is_array($to_lines))
72	{
73	$to_lines = implode("\n", $to_lines);
74	}
75
76	if ($preserve_cr)
77	{
78	$from_lines = explode("\n", str_replace("\r", "\n", str_replace("\r\n", "\n", $from_lines)));
79	$to_lines = explode("\n", str_replace("\r", "\n", str_replace("\r\n", "\n", $to_lines)));
80	}
81	else
82	{
83	$from_lines = explode("\n", preg_replace('#[\n\r]+#', "\n", $from_lines));
84	$to_lines = explode("\n", preg_replace('#[\n\r]+#', "\n", $to_lines));
85	}
86
87	$n_from = count($from_lines);
88	$n_to = count($to_lines);
89
90	$this->xchanged = $this->ychanged = $this->xv = $this->yv = $this->xind = $this->yind = array();
91	unset($this->seq, $this->in_seq, $this->lcs);
92
93	// Skip leading common lines.
94	for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++)
95	{
96	if (trim($from_lines[$skip]) !== trim($to_lines[$skip]))
97	{
98	break;
99	}
100	$this->xchanged[$skip] = $this->ychanged[$skip] = false;
101	}
102
103	// Skip trailing common lines.
104	$xi = $n_from;
105	$yi = $n_to;
106
107	for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++)
108	{
109	if (trim($from_lines[$xi]) !== trim($to_lines[$yi]))
110	{
111	break;
112	}
113	$this->xchanged[$xi] = $this->ychanged[$yi] = false;
114	}
115
116	// Ignore lines which do not exist in both files.
117	for ($xi = $skip; $xi < $n_from - $endskip; $xi++)
118	{
119	if ($this->skip_whitespace_changes) $xhash[trim($from_lines[$xi])] = 1; else $xhash[$from_lines[$xi]] = 1;
120	}
121
122	for ($yi = $skip; $yi < $n_to - $endskip; $yi++)
123	{
124	$line = ($this->skip_whitespace_changes) ? trim($to_lines[$yi]) : $to_lines[$yi];
125
126	if (($this->ychanged[$yi] = empty($xhash[$line])))
127	{
128	continue;
129	}
130	$yhash[$line] = 1;
131	$this->yv[] = $line;
132	$this->yind[] = $yi;
133	}
134
135	for ($xi = $skip; $xi < $n_from - $endskip; $xi++)
136	{
137	$line = ($this->skip_whitespace_changes) ? trim($from_lines[$xi]) : $from_lines[$xi];
138
139	if (($this->xchanged[$xi] = empty($yhash[$line])))
140	{
141	continue;
142	}
143	$this->xv[] = $line;
144	$this->xind[] = $xi;
145	}
146
147	// Find the LCS.
148	$this->_compareseq(0, count($this->xv), 0, count($this->yv));
149
150	// Merge edits when possible.
151	if ($this->skip_whitespace_changes)
152	{
153	$from_lines_clean = array_map('trim', $from_lines);
154	$to_lines_clean = array_map('trim', $to_lines);
155
156	$this->_shift_boundaries($from_lines_clean, $this->xchanged, $this->ychanged);
157	$this->_shift_boundaries($to_lines_clean, $this->ychanged, $this->xchanged);
158
159	unset($from_lines_clean, $to_lines_clean);
160	}
161	else
162	{
163	$this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
164	$this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
165	}
166
167	// Compute the edit operations.
168	$edits = array();
169	$xi = $yi = 0;
170
171	while ($xi < $n_from \|\| $yi < $n_to)
172	{
173	// Skip matching "snake".
174	$copy = array();
175
176	while ($xi < $n_from && $yi < $n_to && !$this->xchanged[$xi] && !$this->ychanged[$yi])
177	{
178	$copy[] = $from_lines[$xi++];
179	$yi++;
180	}
181
182	if ($copy)
183	{
184	$edits[] = new diff_op_copy($copy);
185	}
186
187	// Find deletes & adds.
188	$delete = array();
189	while ($xi < $n_from && $this->xchanged[$xi])
190	{
191	$delete[] = $from_lines[$xi++];
192	}
193
194	$add = array();
195	while ($yi < $n_to && $this->ychanged[$yi])
196	{
197	$add[] = $to_lines[$yi++];
198	}
199
200	if ($delete && $add)
201	{
202	$edits[] = new diff_op_change($delete, $add);
203	}
204	else if ($delete)
205	{
206	$edits[] = new diff_op_delete($delete);
207	}
208	else if ($add)
209	{
210	$edits[] = new diff_op_add($add);
211	}
212	}
213
214	return $edits;
215	}
216
217	/**
218	* Divides the Largest Common Subsequence (LCS) of the sequences (XOFF,
219	* XLIM) and (YOFF, YLIM) into NCHUNKS approximately equally sized segments.
220	*
221	* Returns (LCS, PTS). LCS is the length of the LCS. PTS is an array of
222	* NCHUNKS+1 (X, Y) indexes giving the diving points between sub
223	* sequences. The first sub-sequence is contained in (X0, X1), (Y0, Y1),
224	* the second in (X1, X2), (Y1, Y2) and so on. Note that (X0, Y0) ==
225	* (XOFF, YOFF) and (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
226	*
227	* This function assumes that the first lines of the specified portions of
228	* the two files do not match, and likewise that the last lines do not
229	* match. The caller must trim matching lines from the beginning and end
230	* of the portions it is going to specify.
231	*/
232	function _diag($xoff, $xlim, $yoff, $ylim, $nchunks)
233	{
234	$flip = false;
235
236	if ($xlim - $xoff > $ylim - $yoff)
237	{
238	// Things seems faster (I'm not sure I understand why) when the shortest sequence is in X.
239	$flip = true;
240	list($xoff, $xlim, $yoff, $ylim) = array($yoff, $ylim, $xoff, $xlim);
241	}
242
243	if ($flip)
244	{
245	for ($i = $ylim - 1; $i >= $yoff; $i--)
246	{
247	$ymatches[$this->xv[$i]][] = $i;
248	}
249	}
250	else
251	{
252	for ($i = $ylim - 1; $i >= $yoff; $i--)
253	{
254	$ymatches[$this->yv[$i]][] = $i;
255	}
256	}
257
258	$this->lcs = 0;
259	$this->seq[0]= $yoff - 1;
260	$this->in_seq = array();
261	$ymids[0] = array();
262
263	$numer = $xlim - $xoff + $nchunks - 1;
264	$x = $xoff;
265
266	for ($chunk = 0; $chunk < $nchunks; $chunk++)
267	{
268	if ($chunk > 0)
269	{
270	for ($i = 0; $i <= $this->lcs; $i++)
271	{
272	$ymids[$i][$chunk - 1] = $this->seq[$i];
273	}
274	}
275
276	$x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $chunk) / $nchunks);
277
278	for (; $x < $x1; $x++)
279	{
280	$line = $flip ? $this->yv[$x] : $this->xv[$x];
281	if (empty($ymatches[$line]))
282	{
283	continue;
284	}
285	$matches = $ymatches[$line];
286
287	reset($matches);
288	while (list(, $y) = each($matches))
289	{
290	if (empty($this->in_seq[$y]))
291	{
292	$k = $this->_lcs_pos($y);
293	$ymids[$k] = $ymids[$k - 1];
294	break;
295	}
296	}
297
298	// no reset() here
299	while (list(, $y) = each($matches))
300	{
301	if ($y > $this->seq[$k - 1])
302	{
303	// Optimization: this is a common case: next match is just replacing previous match.
304	$this->in_seq[$this->seq[$k]] = false;
305	$this->seq[$k] = $y;
306	$this->in_seq[$y] = 1;
307	}
308	else if (empty($this->in_seq[$y]))
309	{
310	$k = $this->_lcs_pos($y);
311	$ymids[$k] = $ymids[$k - 1];
312	}
313	}
314	}
315	}
316
317	$seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
318	$ymid = $ymids[$this->lcs];
319
320	for ($n = 0; $n < $nchunks - 1; $n++)
321	{
322	$x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
323	$y1 = $ymid[$n] + 1;
324	$seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
325	}
326	$seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
327
328	return array($this->lcs, $seps);
329	}
330
331	function _lcs_pos($ypos)
332	{
333	$end = $this->lcs;
334
335	if ($end == 0 \|\| $ypos > $this->seq[$end])
336	{
337	$this->seq[++$this->lcs] = $ypos;
338	$this->in_seq[$ypos] = 1;
339	return $this->lcs;
340	}
341
342	$beg = 1;
343	while ($beg < $end)
344	{
345	$mid = (int)(($beg + $end) / 2);
346	if ($ypos > $this->seq[$mid])
347	{
348	$beg = $mid + 1;
349	}
350	else
351	{
352	$end = $mid;
353	}
354	}
355
356	$this->in_seq[$this->seq[$end]] = false;
357	$this->seq[$end] = $ypos;
358	$this->in_seq[$ypos] = 1;
359
360	return $end;
361	}
362
363	/**
364	* Finds LCS of two sequences.
365	*
366	* The results are recorded in the vectors $this->{x,y}changed[], by
367	* storing a 1 in the element for each line that is an insertion or
368	* deletion (ie. is not in the LCS).
369	*
370	* The subsequence of file 0 is (XOFF, XLIM) and likewise for file 1.
371	*
372	* Note that XLIM, YLIM are exclusive bounds. All line numbers are
373	* origin-0 and discarded lines are not counted.
374	*/
375	function _compareseq($xoff, $xlim, $yoff, $ylim)
376	{
377	// Slide down the bottom initial diagonal.
378	while ($xoff < $xlim && $yoff < $ylim && $this->xv[$xoff] == $this->yv[$yoff])
379	{
380	++$xoff;
381	++$yoff;
382	}
383
384	// Slide up the top initial diagonal.
385	while ($xlim > $xoff && $ylim > $yoff && $this->xv[$xlim - 1] == $this->yv[$ylim - 1])
386	{
387	--$xlim;
388	--$ylim;
389	}
390
391	if ($xoff == $xlim \|\| $yoff == $ylim)
392	{
393	$lcs = 0;
394	}
395	else
396	{
397	// This is ad hoc but seems to work well.
398	// $nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
399	// $nchunks = max(2,min(8,(int)$nchunks));
400	$nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
401	list($lcs, $seps) = $this->_diag($xoff, $xlim, $yoff, $ylim, $nchunks);
402	}
403
404	if ($lcs == 0)
405	{
406	// X and Y sequences have no common subsequence: mark all changed.
407	while ($yoff < $ylim)
408	{
409	$this->ychanged[$this->yind[$yoff++]] = 1;
410	}
411
412	while ($xoff < $xlim)
413	{
414	$this->xchanged[$this->xind[$xoff++]] = 1;
415	}
416	}
417	else
418	{
419	// Use the partitions to split this problem into subproblems.
420	reset($seps);
421	$pt1 = $seps[0];
422
423	while ($pt2 = next($seps))
424	{
425	$this->_compareseq($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
426	$pt1 = $pt2;
427	}
428	}
429	}
430
431	/**
432	* Adjusts inserts/deletes of identical lines to join changes as much as possible.
433	*
434	* We do something when a run of changed lines include a line at one end
435	* and has an excluded, identical line at the other. We are free to
436	* choose which identical line is included. 'compareseq' usually chooses
437	* the one at the beginning, but usually it is cleaner to consider the
438	* following identical line to be the "change".
439	*
440	* This is extracted verbatim from analyze.c (GNU diffutils-2.7).
441	*/
442	function _shift_boundaries($lines, &$changed, $other_changed)
443	{
444	$i = 0;
445	$j = 0;
446
447	$len = count($lines);
448	$other_len = count($other_changed);
449
450	while (1)
451	{
452	// Scan forward to find the beginning of another run of
453	// changes. Also keep track of the corresponding point in the other file.
454	//
455	// Throughout this code, $i and $j are adjusted together so that
456	// the first $i elements of $changed and the first $j elements of
457	// $other_changed both contain the same number of zeros (unchanged lines).
458	//
459	// Furthermore, $j is always kept so that $j == $other_len or $other_changed[$j] == false.
460	while ($j < $other_len && $other_changed[$j])
461	{
462	$j++;
463	}
464
465	while ($i < $len && ! $changed[$i])
466	{
467	$i++;
468	$j++;
469
470	while ($j < $other_len && $other_changed[$j])
471	{
472	$j++;
473	}
474	}
475
476	if ($i == $len)
477	{
478	break;
479	}
480
481	$start = $i;
482
483	// Find the end of this run of changes.
484	while (++$i < $len && $changed[$i])
485	{
486	continue;
487	}
488
489	do
490	{
491	// Record the length of this run of changes, so that we can later determine whether the run has grown.
492	$runlength = $i - $start;
493
494	// Move the changed region back, so long as the previous unchanged line matches the last changed one.
495	// This merges with previous changed regions.
496	while ($start > 0 && $lines[$start - 1] == $lines[$i - 1])
497	{
498	$changed[--$start] = 1;
499	$changed[--$i] = false;
500
501	while ($start > 0 && $changed[$start - 1])
502	{
503	$start--;
504	}
505
506	while ($other_changed[--$j])
507	{
508	continue;
509	}
510	}
511
512	// Set CORRESPONDING to the end of the changed run, at the last point where it corresponds to a changed run in the
513	// other file. CORRESPONDING == LEN means no such point has been found.
514	$corresponding = $j < $other_len ? $i : $len;
515
516	// Move the changed region forward, so long as the first changed line matches the following unchanged one.
517	// This merges with following changed regions.
518	// Do this second, so that if there are no merges, the changed region is moved forward as far as possible.
519	while ($i < $len && $lines[$start] == $lines[$i])
520	{
521	$changed[$start++] = false;
522	$changed[$i++] = 1;
523
524	while ($i < $len && $changed[$i])
525	{
526	$i++;
527	}
528
529	$j++;
530	if ($j < $other_len && $other_changed[$j])
531	{
532	$corresponding = $i;
533	while ($j < $other_len && $other_changed[$j])
534	{
535	$j++;
536	}
537	}
538	}
539	}
540	while ($runlength != $i - $start);
541
542	// If possible, move the fully-merged run of changes back to a corresponding run in the other file.
543	while ($corresponding < $i)
544	{
545	$changed[--$start] = 1;
546	$changed[--$i] = 0;
547
548	while ($other_changed[--$j])
549	{
550	continue;
551	}
552	}
553	}
554	}
555	}