Diffクラス修正
本家で修正済みかもしれないけどPukiWikiAdvanceのDiffクラスが正常に動いていなかったのを修正
修正箇所は覚えていないのでソースコードを貼り付けます2014年6月8日現在、以前出ていなかったNGが出てることを確認したため、下記コードは特定の条件下で正常に動作していません
再度1万回テスト実行して見たところNGが出なかったため、なぜNGが出たのか不明。
2014年6月13日現在
3万回テスト実行するもNGが見受けられず、以前テストしたキャッシュの可能性。
もしくはテストケースに問題がある可能性。
修正済みDiff
<?php
// An O(NP) Sequence Comparison Algorithm" for PHP
// Copyright (c) 2012 Logue <logue@hotmail.co.jp> All rights reserved.
// License: BSD license
// based on https://github.com/cubicdaiya/onp
/**
* The algorithm implemented here is based on "An O(NP) Sequence Comparison Algorithm"
* by described by Sun Wu, Udi Manber and Gene Myers
*/
class Diff{
const SES_DELETE = '-';
const SES_COMMON = ' ';
const SES_ADD = '+';
private $a, $b, $m, $n;
private $editdis = 0;
private $reverse = false;
private $pathposi = array();
private $path = array();
private $ses = array();
private $lcs = '';
/**
* コンストラクタ
* @param array $a 元データ
* @param array $b 新しいデータ
*/
public function __construct(/* array */$a, /* array */$b){
$this->a = is_array($a) ? $a : explode("\n", $a);
$this->b = is_array($a) ? $b : explode("\n", $b);
$this->m = count($this->a);
$this->n = count($this->b);
if ($this->m > $this->n){
$this->a = is_array($b) ? $b : explode("\n", $b);
$this->b = is_array($a) ? $a : explode("\n", $a);
$this->m = count($this->b);
$this->n = count($this->a);
$this->reverse = true;
}
self::compose();
}
private function compose(){
$p = -1;
$delta = $this->n - $this->m; // Must be >=0;
$size = $this->m + $this->n + 3;
$offset = $this->m + 1;
$fp = array_fill(0, $size, -1);
$this->path = array_fill(0, $size, -1);
do {
++$p;
for ($k = -$p; $k < $delta; $k++) {
$fp[$k+$offset] = self::snake($k, $fp[$k-1+$offset] + 1, $fp[$k+1+$offset]);
}
for ($k=$delta+$p; $k > $delta; --$k) {
$fp[$k+$offset] = self::snake($k, $fp[$k-1+$offset] + 1, $fp[$k+1+$offset]);
}
$fp[$delta+$offset] = self::snake($delta, $fp[$delta-1+$offset] + 1, $fp[$delta+1+$offset]);
} while($fp[$delta+$offset] !== $this->n);
$this->editdis = $delta + 2 * $p;
$r = $this->path[$delta+$offset];
$epc = array();
while ($r !== -1) {
$_pathposi = $this->pathposi[$r];
$epc[] = array(
'x'=>$_pathposi['x'],
'y'=>$_pathposi['y'],
'k'=>null
);
$r = $_pathposi['k'];
}
self::recordseq($epc);
}
private function snake($k, $p, $pp){
$offset = $this->m + 1;
$r = ($p > $pp) ? $this->path[$k-1+$offset] : $this->path[$k+1+$offset];
$y = max($p, $pp);
$x = $y - $k;
while ($x < $this->m && $y < $this->n &&
(isset($this->a[$x]) && isset($this->b[$y]) && $this->a[$x] === $this->b[$y])) {
$x++;
$y++;
}
$this->path[$k+$offset] = count($this->pathposi);
$this->pathposi[] = array('x'=>$x, 'y'=>$y, 'k'=>$r);
return $y;
}
private function recordseq ($epc) {
if ($this->reverse) {
$tmp = $this->b;
$this->b = $this->a;
$this->a = $tmp;
}
$px_idx = $py_idx = 0;
for ($i = count($epc) - 1; $i>=0; --$i) {
while($px_idx < $epc[$i]['x'] || $py_idx < $epc[$i]['y']) {
if ($epc[$i]['y'] - $epc[$i]['x'] < $py_idx - $px_idx) {
if (isset($this->a[$px_idx])){
$str = isset($this->a[$px_idx]) ? rtrim($this->a[$px_idx]) : '';
$this->ses[] = array(self::SES_DELETE, $str);
}
++$px_idx;
} else if ($epc[$i]['y'] - $epc[$i]['x'] > $py_idx - $px_idx) {
if (isset($this->b[$py_idx])){
$str = isset($this->b[$py_idx]) ? rtrim($this->b[$py_idx]) : '';
$this->ses[] = array(self::SES_ADD, $str);
}
++$py_idx;
} else {
$str = isset($this->a[$px_idx]) ? rtrim($this->a[$px_idx]) : '';
if (isset($this->a[$px_idx])) {
$this->ses[] = array(self::SES_COMMON, $str);
$this->lcs += $this->a[$px_idx];
}
++$px_idx;
++$py_idx;
}
unset($str);
}
}
}
public function getEditDistance(){
return $this->editdis;
}
public function getLcs() {
return $this->lcs;
}
public function getSes() {
return $this->ses;
}
public function getDiff(){
foreach ($this->ses as $k=>$v){
$ret[$k] = $v[0] . $v[1];
}
return $ret;
}
public function getDiffOnly(){
$ret = array();
foreach ($this->ses as $k=>$v){
if ($v[0] === self::SES_ADD || $v[0] === self::SES_DELETE) {
$ret[$k] = $v[0] . $v[1];
}
}
return $ret;
}
// test function
public function getBefore(){
$ret = array();
foreach ($this->ses as $k=>$v){
if ($v[0] === self::SES_COMMON || $v[0] === self::SES_DELETE) {
$ret[$k] = $v[1];
}
}
return $ret;
}
// test function
public function getAfter(){
$ret = array();
foreach ($this->ses as $k=>$v){
if ($v[0] === self::SES_ADD || $v[0] === self::SES_COMMON) {
$ret[$k] = $v[1];
}
}
return $ret;
}
public function getHtml(){
foreach ($this->ses as $k=>$v){
$str = Utility::htmlsc($v[1]);
switch($v[0]){
case self::SES_ADD:
$ret[] = '+<ins class="diff_added">' . $str . '</ins>';
break;
case self::SES_DELETE:
$ret[] = '-<del class="diff_removed">' . $str . '</del>';
break;
default:
$ret[] = ' ' . $str;
break;
}
}
//return '<pre class="sh" data-brush="diff">' . "\n" . join("\n", $ret) . '</pre>' . "\n";
return '<pre>' . "\n" . join("\n", $ret) . '</pre>' . "\n";
}
public function __toString(){
return join("\n",self::getDiff());
}
}
class DiffLine
{
private $text;
private $status;
public function __construct($text)
{
$this->text = $text . "\n";
$this->status = array();
}
public function compare($obj)
{
return $this->text == $obj->text;
}
public function set($key, $status)
{
$this->status[$key] = $status;
}
public function get($key)
{
return isset($this->status[$key]) ? $this->status[$key] : '';
}
public function merge($obj)
{
$this->status += $obj->status;
}
public function text()
{
return $this->text;
}
}
class LineDiff
{
private $arr1, $arr2, $m, $n, $pos, $key, $plus, $minus, $equal, $reverse;
public function __construct($plus = '+', $minus = '-', $equal = ' ')
{
$this->plus = $plus;
$this->minus = $minus;
$this->equal = $equal;
}
public function arr_compare($key, $arr1, $arr2)
{
$this->key = $key;
$this->arr1 = $arr1;
$this->arr2 = $arr2;
$this->compare();
$arr = $this->toArray();
return $arr;
}
public function set_str($key, $str1, $str2)
{
$this->key = $key;
$this->arr1 = array();
$this->arr2 = array();
$str1 = str_replace("\r", '', $str1);
$str2 = str_replace("\r", '', $str2);
foreach (explode("\n", $str1) as $line) {
$this->arr1[] = new DiffLine($line);
}
foreach (explode("\n", $str2) as $line) {
$this->arr2[] = new DiffLine($line);
}
}
public function str_compare($str1, $str2)
{
$this->set_str('diff', $str1, $str2);
$this->compare();
$str = '';
foreach ($this->toArray() as $obj) {
$str .= $obj->get('diff') . $obj->text();
}
return $str;
}
public function compare()
{
$this->m = count($this->arr1);
$this->n = count($this->arr2);
if ($this->m == 0 || $this->n == 0) { // No need to compare
$this->result = array(array('x'=>0, 'y'=>0));
return;
}
// Sentinel
array_unshift($this->arr1, new DiffLine(''));
$this->m++;
array_unshift($this->arr2, new DiffLine(''));
$this->n++;
$this->reverse = ($this->n < $this->m);
if ($this->reverse) {
// Swap
$tmp = $this->m; $this->m = $this->n; $this->n = $tmp;
$tmp = $this->arr1; $this->arr1 = $this->arr2; $this->arr2 = $tmp;
unset($tmp);
}
$delta = $this->n - $this->m; // Must be >=0;
$fp = array();
$this->path = array();
for ($p = -($this->m + 1); $p <= ($this->n + 1); $p++) {
$fp[$p] = -1;
$this->path[$p] = array();
}
for ($p = 0;; $p++) {
for ($k = -$p; $k <= $delta - 1; $k++) {
$fp[$k] = $this->snake($k, $fp[$k - 1], $fp[$k + 1]);
}
for ($k = $delta + $p; $k >= $delta + 1; $k--) {
$fp[$k] = $this->snake($k, $fp[$k - 1], $fp[$k + 1]);
}
$fp[$delta] = $this->snake($delta, $fp[$delta - 1], $fp[$delta + 1]);
if ($fp[$delta] >= $this->n) {
$this->pos = $this->path[$delta]; // 経路を決定
return;
}
}
}
public function snake($k, $y1, $y2)
{
if ($y1 >= $y2) {
$_k = $k - 1;
$y = $y1 + 1;
} else {
$_k = $k + 1;
$y = $y2;
}
$this->path[$k] = $this->path[$_k];// ここまでの経路をコピー
$x = $y - $k;
while ((($x + 1) < $this->m) && (($y + 1) < $this->n)
and $this->arr1[$x + 1]->compare($this->arr2[$y + 1]))
{
++$x; ++$y;
$this->path[$k][] = array('x'=>$x, 'y'=>$y); // 経路を追加
}
return $y;
}
public function toArray()
{
$arr = array();
if ($this->reverse) { // 姑息な…
$_x = 'y'; $_y = 'x'; $_m = $this->n; $arr1 =& $this->arr2; $arr2 =& $this->arr1;
} else {
$_x = 'x'; $_y = 'y'; $_m = $this->m; $arr1 =& $this->arr1; $arr2 =& $this->arr2;
}
$x = $y = 1;
$this->add_count = $this->delete_count = 0;
$this->pos[] = array('x'=>$this->m, 'y'=>$this->n); // Sentinel
foreach ($this->pos as $pos) {
$this->delete_count += ($pos[$_x] - $x);
$this->add_count += ($pos[$_y] - $y);
while ($pos[$_x] > $x) {
$arr1[$x]->set($this->key, $this->minus);
$arr[] = $arr1[$x++];
}
while ($pos[$_y] > $y) {
$arr2[$y]->set($this->key, $this->plus);
$arr[] = $arr2[$y++];
}
if ($x < $_m) {
$arr1[$x]->merge($arr2[$y]);
$arr1[$x]->set($this->key, $this->equal);
$arr[] = $arr1[$x];
}
++$x; ++$y;
}
return $arr;
}
}
Diffテストコード
正常に動作しているかを確認するのに利用したテストコード
Utility.phpとDiff.php依存
<style type="text/css">
/* diff.inc.php */
.diff_added {
color: blue;
}
.diff_removed {
color: red;
}
</style>
<?php
include "Utility.php";
include "Diff.php";
define('SOURCE_ENCODING', 'UTF-8');
// 正常にDIFFが生成されているか比較
function makeDiff($a, $b) {
$diff = new Diff($a, $b);
if ($a == implode("\n", $diff->getBefore())) {
echo "TEST OK";
} else {
echo "TEST NG";
}
if ($b == implode("\n", $diff->getAfter())) {
echo "TEST OK <br>\n";
} else {
echo "TEST NG";
}
echo $diff->getHtml();
}
// ランダム文字列の生成
function create_random_string($length) {
$keys = array_flip(array_merge(
range('0', '9'),
range('a', 'z'),
range('A', 'Z')
));
$s = '';
for ($i = 0; $i < $length; $i++) {
$s .= array_rand($keys);
}
return $s;
}
function createDiffText() {
$texta = "";
$textb = "";
// ランダムな文字列textaとtextbを作成する
for ($i = 0; $i < 20; $i++) {
$n = rand(0,10);
if ($n == 1) {
// aとbに共通文字列追加
$sameword = create_random_string(10) ."\n";
$texta .= $sameword;
$textb .= $sameword;
} else if ($n == 3) {
// bにのみ文字列追加
$textb .= create_random_string(10) ."\n";
} else if ($n == 5){
// aにのみ文字列追加
$texta .= create_random_string(10) ."\n";
} else {
// それ以外はaとbにランダムな文字列を追加
$texta .= create_random_string(10) ."\n";
$textb .= create_random_string(10) ."\n";
}
}
// aとbを比較する
makeDiff($texta, $textb);
}
// createDiffText関数を100回繰り返す
for ($i = 0; $i < 100; $i++) {
createDiffText();
}