Diffクラス修正

本家で修正済みかもしれないけどPukiWikiAdvanceのDiffクラスが正常に動いていなかったのを修正
修正箇所は覚えていないのでソースコードを貼り付けます
2014年6月8日現在、以前出ていなかったNGが出てることを確認したため、下記コードは特定の条件下で正常に動作していません
再度1万回テスト実行して見たところNGが出なかったため、なぜNGが出たのか不明。
2014年6月13日現在
3万回テスト実行するもNGが見受けられず、以前テストしたキャッシュの可能性。
もしくはテストケースに問題がある可能性。

修正済みDiff

<?php
// An O(NP) Sequence Comparison Algorithm" for PHP
// Copyright (c) 2012 Logue <logue@hotmail.co.jp> All rights reserved.
// License: BSD license
// based on https://github.com/cubicdaiya/onp
/**
 * The algorithm implemented here is based on "An O(NP) Sequence Comparison Algorithm"
 * by described by Sun Wu, Udi Manber and Gene Myers
 */
class Diff{
	const SES_DELETE = '-';
	const SES_COMMON = ' ';
	const SES_ADD    = '+';
	private $a, $b, $m, $n;
	private $editdis = 0;
	private $reverse = false;
	private $pathposi = array();
	private $path = array();
	private $ses = array();
	private $lcs = '';
	/**
	 * コンストラクタ
	 * @param array $a 元データ
	 * @param array $b 新しいデータ
	 */
	public function __construct(/* array */$a, /* array */$b){
		$this->a = is_array($a) ? $a : explode("\n", $a);
		$this->b = is_array($a) ? $b : explode("\n", $b);
		$this->m = count($this->a);
		$this->n = count($this->b);
		if ($this->m > $this->n){
			$this->a = is_array($b) ? $b : explode("\n", $b);
			$this->b = is_array($a) ? $a : explode("\n", $a);
			$this->m = count($this->b);
			$this->n = count($this->a);
			$this->reverse = true;
		}
		self::compose();
	}
	private function compose(){
		$p = -1;
		$delta  = $this->n - $this->m; // Must be >=0;
		$size   = $this->m + $this->n + 3;
		$offset = $this->m + 1;
		$fp = array_fill(0, $size, -1);
		$this->path = array_fill(0, $size, -1);
		do {
			++$p;
			for ($k = -$p; $k < $delta; $k++) {
				$fp[$k+$offset] = self::snake($k, $fp[$k-1+$offset] + 1, $fp[$k+1+$offset]);
			}
			for ($k=$delta+$p; $k > $delta; --$k) {
				$fp[$k+$offset] = self::snake($k, $fp[$k-1+$offset] + 1, $fp[$k+1+$offset]);
			}
			$fp[$delta+$offset] = self::snake($delta, $fp[$delta-1+$offset] + 1, $fp[$delta+1+$offset]);
		} while($fp[$delta+$offset] !== $this->n);
		$this->editdis = $delta + 2 * $p;
		$r = $this->path[$delta+$offset];
		$epc = array();
		while ($r !== -1) {
			$_pathposi = $this->pathposi[$r];
			$epc[] = array(
				'x'=>$_pathposi['x'],
				'y'=>$_pathposi['y'],
				'k'=>null
			);
			$r = $_pathposi['k'];
		}
		self::recordseq($epc);
	}
	private function snake($k, $p, $pp){
		$offset = $this->m + 1;
		$r = ($p > $pp) ? $this->path[$k-1+$offset] : $this->path[$k+1+$offset];
		$y = max($p, $pp);
		$x = $y - $k;
		while ($x < $this->m && $y < $this->n &&
			 (isset($this->a[$x]) && isset($this->b[$y]) && $this->a[$x] === $this->b[$y])) {
			$x++;
			$y++;
		}
		$this->path[$k+$offset] = count($this->pathposi);
		$this->pathposi[] = array('x'=>$x, 'y'=>$y, 'k'=>$r);
		return $y;
	}
	private function recordseq ($epc) {
		if ($this->reverse) {
			$tmp = $this->b;
			$this->b = $this->a;
			$this->a = $tmp;
		}
		$px_idx = $py_idx = 0;
		for ($i = count($epc) - 1; $i>=0; --$i) {
			while($px_idx < $epc[$i]['x'] || $py_idx < $epc[$i]['y']) {
				if ($epc[$i]['y'] - $epc[$i]['x'] < $py_idx - $px_idx) {
					if (isset($this->a[$px_idx])){
						$str = isset($this->a[$px_idx]) ? rtrim($this->a[$px_idx]) : '';
						$this->ses[] = array(self::SES_DELETE, $str);
					}
					++$px_idx;
				} else if ($epc[$i]['y'] - $epc[$i]['x'] > $py_idx - $px_idx) {
					if (isset($this->b[$py_idx])){
						$str = isset($this->b[$py_idx]) ? rtrim($this->b[$py_idx]) : '';
						$this->ses[] = array(self::SES_ADD,    $str);
					}
					++$py_idx;
				} else {
					$str = isset($this->a[$px_idx]) ? rtrim($this->a[$px_idx]) : '';
					if (isset($this->a[$px_idx])) {
						$this->ses[] =     array(self::SES_COMMON, $str);
						$this->lcs += $this->a[$px_idx];
					}
					++$px_idx;
					++$py_idx;
				}
				unset($str);
			}
		}
	}
	public function getEditDistance(){
		return $this->editdis;
	}
	public function getLcs() {
		return $this->lcs;
	}
	public function getSes() {
		return $this->ses;
	}
	public function getDiff(){
		foreach ($this->ses as $k=>$v){
			$ret[$k] = $v[0] . $v[1];
		}
		return $ret;
	}
	public function getDiffOnly(){
		$ret = array();
		foreach ($this->ses as $k=>$v){
			if ($v[0] === self::SES_ADD || $v[0] === self::SES_DELETE) {
				$ret[$k] = $v[0] . $v[1];
			}
		}
		return $ret;
	}
	// test function
	public function getBefore(){
		$ret = array();
		foreach ($this->ses as $k=>$v){
			if ($v[0] === self::SES_COMMON || $v[0] === self::SES_DELETE) {
				$ret[$k] = $v[1];
			}
		}
		return $ret;
	}
	// test function
	public function getAfter(){
		$ret = array();
		foreach ($this->ses as $k=>$v){
			if ($v[0] === self::SES_ADD || $v[0] === self::SES_COMMON) {
				$ret[$k] = $v[1];
			}
		}
		return $ret;
	}
	public function getHtml(){
		foreach ($this->ses as $k=>$v){
			$str = Utility::htmlsc($v[1]);
			switch($v[0]){
				case self::SES_ADD:
					$ret[] = '+<ins class="diff_added">' . $str . '</ins>';
					break;
				case self::SES_DELETE:
					$ret[] = '-<del class="diff_removed">' . $str . '</del>';
					break;
				default:
					$ret[] = ' ' . $str;
					break;
			}
		}
		//return '<pre class="sh" data-brush="diff">' . "\n" . join("\n", $ret) . '</pre>' . "\n";
		return '<pre>' . "\n" . join("\n", $ret) . '</pre>' . "\n";
	}
	public function __toString(){
		return join("\n",self::getDiff());
	}
}
class DiffLine
{
	private $text;
	private $status;
	public function __construct($text)
	{
		$this->text   = $text . "\n";
		$this->status = array();
	}
	public function compare($obj)
	{
		return $this->text == $obj->text;
	}
	public function set($key, $status)
	{
		$this->status[$key] = $status;
	}
	public function get($key)
	{
		return isset($this->status[$key]) ? $this->status[$key] : '';
	}
	public function merge($obj)
	{
		$this->status += $obj->status;
	}
	public function text()
	{
		return $this->text;
	}
}
class LineDiff
{
	private $arr1, $arr2, $m, $n, $pos, $key, $plus, $minus, $equal, $reverse;
	public function __construct($plus = '+', $minus = '-', $equal = ' ')
	{
		$this->plus  = $plus;
		$this->minus = $minus;
		$this->equal = $equal;
	}
	public function arr_compare($key, $arr1, $arr2)
	{
		$this->key  = $key;
		$this->arr1 = $arr1;
		$this->arr2 = $arr2;
		$this->compare();
		$arr = $this->toArray();
		return $arr;
	}
	public function set_str($key, $str1, $str2)
	{
		$this->key  = $key;
		$this->arr1 = array();
		$this->arr2 = array();
		$str1 = str_replace("\r", '', $str1);
		$str2 = str_replace("\r", '', $str2);
		foreach (explode("\n", $str1) as $line) {
			$this->arr1[] = new DiffLine($line);
		}
		foreach (explode("\n", $str2) as $line) {
			$this->arr2[] = new DiffLine($line);
		}
	}
	public function str_compare($str1, $str2)
	{
		$this->set_str('diff', $str1, $str2);
		$this->compare();
		$str = '';
		foreach ($this->toArray() as $obj) {
			$str .= $obj->get('diff') . $obj->text();
		}
		return $str;
	}
	public function compare()
	{
		$this->m = count($this->arr1);
		$this->n = count($this->arr2);
		if ($this->m == 0 || $this->n == 0) { // No need to compare
			$this->result = array(array('x'=>0, 'y'=>0));
			return;
		}
		// Sentinel
		array_unshift($this->arr1, new DiffLine(''));
		$this->m++;
		array_unshift($this->arr2, new DiffLine(''));
		$this->n++;
		$this->reverse = ($this->n < $this->m);
		if ($this->reverse) {
			// Swap
			$tmp = $this->m; $this->m = $this->n; $this->n = $tmp;
			$tmp = $this->arr1; $this->arr1 = $this->arr2; $this->arr2 = $tmp;
			unset($tmp);
		}
		$delta = $this->n - $this->m; // Must be >=0;
		$fp = array();
		$this->path = array();
		for ($p = -($this->m + 1); $p <= ($this->n + 1); $p++) {
			$fp[$p] = -1;
			$this->path[$p] = array();
		}
		for ($p = 0;; $p++) {
			for ($k = -$p; $k <= $delta - 1; $k++) {
				$fp[$k] = $this->snake($k, $fp[$k - 1], $fp[$k + 1]);
			}
			for ($k = $delta + $p; $k >= $delta + 1; $k--) {
				$fp[$k] = $this->snake($k, $fp[$k - 1], $fp[$k + 1]);
			}
			$fp[$delta] = $this->snake($delta, $fp[$delta - 1], $fp[$delta + 1]);
			if ($fp[$delta] >= $this->n) {
				$this->pos = $this->path[$delta]; // 経路を決定
				return;
			}
		}
	}
	public function snake($k, $y1, $y2)
	{
		if ($y1 >= $y2) {
			$_k = $k - 1;
			$y = $y1 + 1;
		} else {
			$_k = $k + 1;
			$y = $y2;
		}
		$this->path[$k] = $this->path[$_k];// ここまでの経路をコピー
		$x = $y - $k;
		while ((($x + 1) < $this->m) && (($y + 1) < $this->n)
			and $this->arr1[$x + 1]->compare($this->arr2[$y + 1]))
		{
			++$x; ++$y;
			$this->path[$k][] = array('x'=>$x, 'y'=>$y); // 経路を追加
		}
		return $y;
	}
	public function toArray()
	{
		$arr = array();
		if ($this->reverse) { // 姑息な…
			$_x = 'y'; $_y = 'x'; $_m = $this->n; $arr1 =& $this->arr2; $arr2 =& $this->arr1;
		} else {
			$_x = 'x'; $_y = 'y'; $_m = $this->m; $arr1 =& $this->arr1; $arr2 =& $this->arr2;
		}
		$x = $y = 1;
		$this->add_count = $this->delete_count = 0;
		$this->pos[] = array('x'=>$this->m, 'y'=>$this->n); // Sentinel
		foreach ($this->pos as $pos) {
			$this->delete_count += ($pos[$_x] - $x);
			$this->add_count    += ($pos[$_y] - $y);
			while ($pos[$_x] > $x) {
				$arr1[$x]->set($this->key, $this->minus);
				$arr[] = $arr1[$x++];
			}
			while ($pos[$_y] > $y) {
				$arr2[$y]->set($this->key, $this->plus);
				$arr[] =  $arr2[$y++];
			}
			if ($x < $_m) {
				$arr1[$x]->merge($arr2[$y]);
				$arr1[$x]->set($this->key, $this->equal);
				$arr[] = $arr1[$x];
			}
			++$x; ++$y;
		}
		return $arr;
	}
}

Diffテストコード

正常に動作しているかを確認するのに利用したテストコード
Utility.phpとDiff.php依存

<style type="text/css">
/* diff.inc.php */
.diff_added {
	color: blue;
}
.diff_removed {
	color: red;
}
</style>
<?php
include "Utility.php";
include "Diff.php";
define('SOURCE_ENCODING', 'UTF-8');
// 正常にDIFFが生成されているか比較
function makeDiff($a, $b) {
	$diff = new Diff($a, $b);
	if ($a == implode("\n", $diff->getBefore())) {
		echo "TEST OK";
	} else {
		echo "TEST NG";
	}
	if ($b == implode("\n", $diff->getAfter())) {
		echo "TEST OK <br>\n";
	} else {
		echo "TEST NG";
	}
	echo $diff->getHtml();
}
// ランダム文字列の生成
function create_random_string($length) {
    $keys = array_flip(array_merge(
        range('0', '9'),
        range('a', 'z'),
        range('A', 'Z')
    ));
    $s = '';
    for ($i = 0; $i < $length; $i++) {
        $s .= array_rand($keys);
    }
    return $s;
}
function createDiffText() {
	$texta = "";
	$textb = "";
	// ランダムな文字列textaとtextbを作成する
	for ($i = 0; $i < 20; $i++) {
		$n = rand(0,10);
		if ($n == 1) {
			// aとbに共通文字列追加
			$sameword = create_random_string(10) ."\n";
			$texta .= $sameword;
			$textb .= $sameword;
		} else if ($n == 3) {
			// bにのみ文字列追加
			$textb .= create_random_string(10) ."\n";
		} else if ($n == 5){
			// aにのみ文字列追加
			$texta .= create_random_string(10) ."\n";
		} else {
			// それ以外はaとbにランダムな文字列を追加
			$texta .= create_random_string(10) ."\n";
			$textb .= create_random_string(10) ."\n";
		}
	}
	// aとbを比較する
	makeDiff($texta, $textb);
}
// createDiffText関数を100回繰り返す
for ($i = 0; $i < 100; $i++) {
	createDiffText();
}

コメント

スポンサー