| <?php |
|
|
| |
| |
| |
| |
| function diff_analysis() { |
| global $task,$user,$setup,$id,$id2,$set; |
| global $comment,$dir; |
|
|
| head("Comparative Analysis: $task ($user), Set $set"); |
|
|
| $c = $comment["$setup-$id"]->name; |
| $c2 = $comment["$setup-$id2"]->name; |
| if (substr($c2,0,strlen($id)+1) == $id."+") { |
| print "Run $id2 vs $id (".substr($c2,strlen($id)).")"; |
| } |
| else { |
| print "Run $id2 ($c2) vs $id ($c)"; |
| } |
| print "</h4>"; |
|
|
| ?><script language="javascript" src="javascripts/prototype.js"></script> |
| <script language="javascript" src="javascripts/scriptaculous.js"></script> |
| <script> |
| function diff(field,sort,count) { |
| var url = '?analysis_diff=' + field + '_diff' |
| + '&setup=<?php print $setup ?>' |
| + '&id=<?php print $id ?>' |
| + '&id2=<?php print $id2 ?>' |
| + '&set=<?php print $set ?>' |
| + '&sort=' + sort |
| + '&count=' + count; |
| new Ajax.Updater(field, url, { method: 'get' }); |
| } |
| function ngram_diff(type,order,count,sort,smooth) { |
| var url = '?analysis_diff=ngram_' + type + '_diff' |
| + '&setup=<?php print $setup ?>' |
| + '&id=<?php print $id ?>' |
| + '&id2=<?php print $id2 ?>' |
| + '&set=<?php print $set ?>' |
| + '&order=' + order |
| + '&smooth=' + smooth |
| + '&sort=' + sort |
| + '&count=' + count; |
| var field = (type == "precision" ? "nGramPrecision" : "nGramRecall") + order; |
| new Ajax.Updater(field, url, { method: 'get' }); |
| } |
| function generic_show_diff(field,parameters) { |
| var url = '?analysis=' + field + '_show' |
| + '&setup=<?php print $setup ?>' |
| + '&id=<?php print $id ?>' |
| + '&id2=<?php print $id2 ?>' |
| + '&set=<?php print $set ?>' |
| + '&' + parameters; |
| new Ajax.Updater(field, url, { method: 'get', evalScripts: true }); |
| } |
| </script> |
| </head> |
| <body> |
| <div id="nGramSummary"><?php ngram_summary_diff() ?></div> |
| <div id="PrecisionByCoverageDiff"></div> |
| <div id="PrecisionRecallDetailsDiff"></div> |
| <div id="bleu">(loading...)</div> |
| <script> |
| diff('bleu','',5); |
| </script> |
| </body></html> |
| <?php |
| } |
|
|
| function precision_by_coverage_diff() { |
| global $experiment,$evalset,$dir,$set,$id,$id2; |
| $img_width = 1000; |
|
|
| print "<h3>Precision by Coverage</h3>"; |
| print "The graphs display what ratio of words of a specific type are translated correctly (yellow), and what ratio is deleted (blue)."; |
| print " The extend of the boxes is scaled on the x-axis by the number of tokens of the displayed type."; |
| |
| $data = file(get_current_analysis_filename2("precision","precision-by-corpus-coverage")); |
| $total = 0; |
| $log_info = array(); |
| for($i=0;$i<count($data);$i++) { |
| $item = split("\t",$data[$i]); |
| $info[$item[0]]["precision"] = $item[1]; |
| $info[$item[0]]["delete"] = $item[2]; |
| $info[$item[0]]["length"] = $item[3]; |
| $info[$item[0]]["total"] = $item[4]; |
| $total += $item[4]; |
| $log_count = -1; |
| if ($item[0]>0) { |
| $log_count = (int) (log($item[0])/log(2)); |
| } |
| if (!array_key_exists($log_count,$log_info)) { |
| $log_info[$log_count]["precision"] = 0; |
| $log_info[$log_count]["delete"] = 0; |
| $log_info[$log_count]["length"] = 0; |
| $log_info[$log_count]["total"] = 0; |
| } |
| $log_info[$log_count]["precision"] += $item[1]; |
| $log_info[$log_count]["delete"] += $item[2]; |
| $log_info[$log_count]["length"] += $item[3]; |
| $log_info[$log_count]["total"] += $item[4]; |
| } |
| $log_info_new = $log_info; |
|
|
| |
| $data = file(get_current_analysis_filename("precision","precision-by-corpus-coverage")); |
| for($i=0;$i<count($data);$i++) { |
| $item = split("\t",$data[$i]); |
| $info[$item[0]]["precision"] -= $item[1]; |
| $info[$item[0]]["delete"] -= $item[2]; |
| $info[$item[0]]["length"] -= $item[3]; |
| $log_count = -1; |
| if ($item[0]>0) { |
| $log_count = (int) (log($item[0])/log(2)); |
| } |
| $log_info[$log_count]["precision"] -= $item[1]; |
| $log_info[$log_count]["delete"] -= $item[2]; |
| $log_info[$log_count]["length"] -= $item[3]; |
| } |
|
|
|
|
| print "<h4>By log<sub>2</sub>-count in the training corpus</h4>"; |
| precision_by_coverage_diff_graph("byCoverage",$log_info,$log_info_new,$total,$img_width,SORT_NUMERIC); |
| precision_by_coverage_diff_matrix(); |
|
|
| |
| $d = dir("$dir/evaluation/$set.analysis.".get_precision_analysis_version($dir,$set,$id)); |
| while (false !== ($file = $d->read())) { |
| if (preg_match('/precision-by-corpus-coverage.(.+)$/',$file, $match) && |
| file_exists(get_current_analysis_filename2("precision","precision-by-corpus-coverage.$match[1]"))) { |
| precision_by_coverage_diff_factored($img_width,$total,$file,$match[1]); |
| } |
| } |
| } |
|
|
| function precision_by_coverage_diff_factored($img_width,$total,$file,$factor_id) { |
| global $dir,$set,$id,$id2; |
| $data = file(get_current_analysis_filename2("precision",$file)); |
| for($i=0;$i<count($data);$i++) { |
| $item = split("\t",$data[$i]); |
| $factor = $item[0]; |
| $count = $item[1]; |
| $info_factored[$factor][$count]["precision"] = $item[2]; |
| $info_factored[$factor][$count]["delete"] = $item[3]; |
| $info_factored[$factor][$count]["length"] = $item[4]; |
| $info_factored[$factor][$count]["total"] = $item[5]; |
| $info_factored_sum[$factor]["precision"] += $item[2]; |
| $info_factored_sum[$factor]["delete"] += $item[3]; |
| $info_factored_sum[$factor]["length"] += $item[4]; |
| $info_factored_sum[$factor]["total"] += $item[5]; |
| $total_factored[$factor] += $item[5]; |
| $log_count = -1; |
| if ($count>0) { |
| $log_count = (int) (log($count)/log(2)); |
| } |
| $log_info_factored[$factor][$log_count]["precision"] += $item[2]; |
| $log_info_factored[$factor][$log_count]["delete"] += $item[3]; |
| $log_info_factored[$factor][$log_count]["length"] += $item[4]; |
| $log_info_factored[$factor][$log_count]["total"] += $item[5]; |
| } |
| $info_factored_new = $info_factored; |
| $info_factored_sum_new = $info_factored_sum; |
| $log_info_factored_new = $log_info_factored; |
|
|
| |
| $data = file(get_current_analysis_filename("precision",$file)); |
| for($i=0;$i<count($data);$i++) { |
| $item = split("\t",$data[$i]); |
| $factor = $item[0]; |
| $count = $item[1]; |
| $info_factored[$factor][$count]["precision"] -= $item[2]; |
| $info_factored[$factor][$count]["delete"] -= $item[3]; |
| $info_factored[$factor][$count]["length"] -= $item[4]; |
| $info_factored_sum[$factor]["precision"] -= $item[2]; |
| $info_factored_sum[$factor]["delete"] -= $item[3]; |
| $info_factored_sum[$factor]["length"] -= $item[4]; |
| $log_count = -1; |
| if ($count>0) { |
| $log_count = (int) (log($count)/log(2)); |
| } |
| $log_info_factored[$factor][$log_count]["precision"] -= $item[2]; |
| $log_info_factored[$factor][$log_count]["delete"] -= $item[3]; |
| $log_info_factored[$factor][$log_count]["length"] -= $item[4]; |
| } |
| print "<h4>By factor ".factor_name("input",$factor_id)."</h4>"; |
| precision_by_coverage_diff_graph("byFactor",$info_factored_sum,$info_factored_sum_new,$total,$img_width,SORT_STRING); |
|
|
| print "<h4>For each factor, by log<sub>2</sub>-count in the corpus</h4>"; |
| foreach ($log_info_factored as $factor => $info) { |
| if ($total_factored[$factor]/$total > 0.01) { |
| print "<table style=\"display:inline;\"><tr><td align=center><font size=-2><b>$factor</b></font>"; |
| precision_by_coverage_diff_graph("byCoverageFactor$factor",$info,$log_info_factored_new[$factor],$total,10+2*$img_width*$total_factored[$factor]/$total,SORT_NUMERIC); |
| print "</td></tr></table>"; |
| } |
| } |
| } |
|
|
| function precision_by_word_diff($type) { |
| global $dir,$set,$id,$id2; |
| $byCoverage = -2; |
| $byFactor = "false"; |
| if ($type == "byCoverage") { |
| $byCoverage = (int) $_GET["type"]; |
| } |
| else if ($type == "byFactor") { |
| $byFactor = $_GET["type"]; |
| } |
| else if (preg_match("/byCoverageFactor(.+)/",$type,$match)) { |
| $byCoverage = (int) $_GET["type"]; |
| $byFactor = $match[1]; |
| } |
|
|
| $data = file(get_current_analysis_filename2("precision","precision-by-input-word")); |
| $total = 0; |
| $info = array(); |
| for($i=0;$i<count($data);$i++) { |
| $line = rtrim($data[$i]); |
| $item = split("\t",$line); |
| $total += $item[3]; |
|
|
| |
| $count = $item[4]; |
| $log_count = -1; |
| if ($count>0) { |
| $log_count = (int) (log($count)/log(2)); |
| } |
| if ($byCoverage != -2 && $byCoverage != $log_count) { |
| continue; |
| } |
|
|
| |
| $word = $item[5]; |
| if ($byFactor != "false" && $byFactor != $item[6]) { |
| continue; |
| } |
| if (!array_key_exists($word,$info)) { |
| $info[$word]["precision"] = 0; |
| $info[$word]["delete"] = 0; |
| $info[$word]["length"] = 0; |
| $info[$word]["total"] = 0; |
| } |
| $info[$word]["precision"] += $item[0]; |
| $info[$word]["delete"] += $item[1]; |
| $info[$word]["length"] += $item[2]; |
| $info[$word]["total"] += $item[3]; |
| } |
| $info_new = $info; |
|
|
| $data = file(get_current_analysis_filename("precision","precision-by-input-word")); |
| for($i=0;$i<count($data);$i++) { |
| $line = rtrim($data[$i]); |
| $item = split("\t",$line); |
|
|
| |
| $count = $item[4]; |
| $log_count = -1; |
| if ($count>0) { |
| $log_count = (int) (log($count)/log(2)); |
| } |
| if ($byCoverage != -2 && $byCoverage != $log_count) { |
| continue; |
| } |
|
|
| |
| $word = $item[5]; |
| if ($byFactor != "false" && $byFactor != $item[6]) { |
| continue; |
| } |
| if (!array_key_exists($word,$info)) { |
| $info[$word]["precision"] = 0; |
| $info[$word]["delete"] = 0; |
| $info[$word]["length"] = 0; |
| $info_new[$word]["length"] = 0; |
| $info_new[$word]["delete"] = 0; |
| $info_new[$word]["precision"] = 0; |
| $info_new[$word]["total"] = 0; |
| $info[$word]["total"] = -$item[3]; |
| } |
| $info[$word]["precision"] -= $item[0]; |
| $info[$word]["delete"] -= $item[1]; |
| $info[$word]["length"] -= $item[2]; |
| } |
|
|
| print "<table border=1><tr><td align=center> </td><td align=center colspan=3>Precision</td><td align=center colspan=2>Precision Impact</td><td align=center colspan=3>Delete</td><td align=center colspan=2>Delete Impact</td><td align=center>Length</td></tr>\n"; |
| foreach ($info as $word => $wordinfo) { |
| print "<tr><td align=center>$word</td>"; |
| printf("<td align=right>%.1f%s</td><td align=right>%+.1f%s</td><td align=right><font size=-1>%+.1f/%d</font></td>",$info_new[$word]["precision"]/$wordinfo["total"]*100,"%",$wordinfo["precision"]/$wordinfo["total"]*100,"%",$wordinfo["precision"],$wordinfo["total"]); |
| printf("<td align=right>%+.2f%s</td><td align=right><font size=-1>%+.1f/%d</font></td>",$wordinfo["precision"]/$total*100,"%",$wordinfo["precision"],$total); |
| printf("<td align=right>%.1f%s</td><td align=right>%+.1f%s</td><td align=right><font size=-1>%+d/%d</font></td>",$info_new[$word]["delete"]/$wordinfo["total"]*100,"%",$wordinfo["delete"]/$wordinfo["total"]*100,"%",$wordinfo["delete"],$wordinfo["total"]); |
| printf("<td align=right>%+.2f%s</td><td align=right><font size=-1>%+d/%d</font></td>",$wordinfo["delete"]/$total*100,"%",$wordinfo["delete"],$total); |
| printf("<td align=right>%+.3f</td>",$wordinfo["length"]/$wordinfo["total"]); |
| print "</tr>"; |
| } |
| print "</table>\n"; |
| } |
|
|
| function precision_by_coverage_diff_matrix() { |
| global $id,$id2; |
| print "<h4>Impact of Change in Coverage</h4>"; |
| print "Coverage in run $id is the x-axis, change in coverage in run $id2 is the y-axis. Size of box reflects how many output words are produced, yellow is the number of correct translations, green indicates increase, green decrease. The bleu rectangle below each box indicates number of words dropped, and increase (cyan) or decrease (purple).<p>("; |
| $scale = 30; |
| for($i=1; $i<=5; $i++) { |
| $size = (int)(sqrt($i*$scale)); |
| $name = "size-$i"; |
| print "<canvas id=\"$name\" width=\"$size\" height=\"$size\"></canvas><script language=\"javascript\"> |
| var canvas = document.getElementById(\"$name\"); |
| var ctx = canvas.getContext(\"2d\"); |
| ctx.fillStyle = \"rgb(0,0,0)\"; |
| ctx.fillRect (0, 0, $size, $size); |
| </script> = $i word"; |
| if ($i>1) { print "s"; } |
| if ($i<5) { print ", "; } |
| } |
| print ")<p>"; |
| |
| $data = file(get_current_analysis_filename("precision","precision-by-input-word")); |
| $word = array(); $class = array(); |
| for($i=0;$i<count($data);$i++) { |
| $line = rtrim($data[$i]); |
| $item = split("\t",$line); |
| $surface = $item[5]; |
| $word[$surface] = array(); |
| $word[$surface]["precision"] = $item[0]; |
| $word[$surface]["delete"] = $item[1]; |
| $word[$surface]["total"] = $item[2]; |
| $word[$surface]["coverage"] = $item[4]; |
| if ($item[4] == 0) { $log_count = -1; } |
| else { $log_count = (int) (log($item[4])/log(2)); } |
| $word[$surface]["log_count"] = $log_count; |
| if (!array_key_exists($log_count,$class)) { $class[$log_count] = array(); } |
| $class[$log_count][] = $surface; |
| } |
|
|
| |
| $matrix = array(); |
| $max_base_log_count = -1; |
| $min_alt_log_count = 99; |
| $max_alt_log_count = -1; |
| foreach(array_keys($class) as $log_count) { |
| $matrix[$log_count] = array(); |
| if ($log_count > $max_base_log_count) { $max_base_log_count = $log_count; } |
| } |
|
|
| |
| $data = file(get_current_analysis_filename2("precision","precision-by-input-word")); |
| for($i=0;$i<count($data);$i++) { |
| $line = rtrim($data[$i]); |
| $item = split("\t",$line); |
| $surface = $item[5]; |
| if ($item[4] == 0) { $alt = -1; } |
| else { $alt = (int) (log($item[4])/log(2)); } |
|
|
| $base = -1; |
| if (array_key_exists($surface,$word)) { |
| $base = $word[$surface]["log_count"]; |
| } |
|
|
| $alt -= $base; |
| if ($alt > $max_alt_log_count) { $max_alt_log_count = $alt; } |
| if ($alt < $min_alt_log_count) { $min_alt_log_count = $alt; } |
|
|
| if (!array_key_exists($alt,$matrix[$base])) { |
| $matrix[$base][$alt] = array(); |
| $matrix[$base][$alt]["precision1"] = 0; |
| $matrix[$base][$alt]["delete1"] = 0; |
| $matrix[$base][$alt]["total1"] = 0; |
| $matrix[$base][$alt]["coverage1"] = 0; |
| $matrix[$base][$alt]["precision2"] = 0; |
| $matrix[$base][$alt]["delete2"] = 0; |
| $matrix[$base][$alt]["total2"] = 0; |
| $matrix[$base][$alt]["coverage2"] = 0; |
| } |
| |
| if (array_key_exists($surface,$word)) { |
| $matrix[$base][$alt]["precision1"] += $word[$surface]["precision"]; |
| $matrix[$base][$alt]["delete1"] += $word[$surface]["delete"]; |
| $matrix[$base][$alt]["total1"] += $word[$surface]["total"]; |
| $matrix[$base][$alt]["coverage1"] += $word[$surface]["count"]; |
| $matrix[$base][$alt]["precision2"] += $item[0]; |
| $matrix[$base][$alt]["delete2"] += $item[1]; |
| $matrix[$base][$alt]["total2"] += $item[2]; |
| $matrix[$base][$alt]["coverage2"] += $item[4]; |
| } |
| } |
|
|
| |
| print "<table border=1 cellspacing=0 cellpadding=0><tr><td> </td>"; |
| for($base=-1;$base<=$max_base_log_count;$base++) { |
| print "<td align=center>$base</td>"; |
| } |
| print "<td></td></tr>"; |
| for($alt=$max_alt_log_count;$alt>=$min_alt_log_count;$alt--) { |
| print "<tr><td>$alt</td>"; |
| for($base=-1;$base<=$max_base_log_count;$base++) { |
| print "<td align=center valign=center>"; |
| if (array_key_exists($base,$matrix) && |
| array_key_exists($alt,$matrix[$base])) { |
| |
| |
| |
| |
| |
| |
| $scale = 30; |
| $total = $matrix[$base][$alt]["total1"]; |
| if ($matrix[$base][$alt]["total2"] > $total) { |
| $total = $matrix[$base][$alt]["total2"]; |
| } |
| $total = (int)(sqrt($total*$scale)); |
| if ($total>0) { |
| $prec1 = $matrix[$base][$alt]["precision1"]*$scale; |
| $prec2 = $matrix[$base][$alt]["precision2"]*$scale; |
| if ($prec1 > $prec2) { |
| $prec_base = (int)(sqrt($prec1)); |
| $prec_imp = (int)(sqrt($prec1-$prec2)); |
| $prec_color = "255,100,100"; |
| } |
| else { |
| $prec_base = (int)(sqrt($prec2)); |
| $prec_imp = (int)(sqrt($prec2-$prec1)); |
| $prec_color = "100,255,100"; |
| } |
| $prec_base_top = (int)(($total-$prec_base)/2); |
| $prec_imp_top = (int)(($total-$prec_imp)/2); |
|
|
| $del1 = $matrix[$base][$alt]["delete1"]*$scale; |
| $del2 = $matrix[$base][$alt]["delete2"]*$scale; |
| if ($del1 > $del2) { |
| $del_base = $del1; |
| $del_imp = $del1-$del2; |
| $del_color = "150,100,255"; |
| } |
| else { |
| $del_base = $del2; |
| $del_imp = $del2-$del1; |
| $del_color = "100,200,200"; |
| } |
| $del_base_height = (int)($del_base/$total); |
| $del_imp_height = (int)($del_imp/$total); |
|
|
| $name = "matrix-$base-$alt"; |
| |
| print "<a href=\"javascript:generic_show_diff('CoverageMatrixDetails','base=$base&alt=$alt')\">"; |
| print "<canvas id=\"$name\" width=\"$total\" height=\"".($total+$del_base_height+$del_imp_height)."\"></canvas></a>"; |
| print "<script language=\"javascript\"> |
| var canvas = document.getElementById(\"$name\"); |
| var ctx = canvas.getContext(\"2d\"); |
| ctx.fillStyle = \"rgb(200,200,200)\"; |
| ctx.fillRect (0, 0, $total, $total); |
| ctx.fillStyle = \"rgb(200,200,0)\"; |
| ctx.fillRect ($prec_base_top, $prec_base_top, $prec_base, $prec_base); |
| ctx.fillStyle = \"rgb($prec_color)\"; |
| ctx.fillRect ($prec_imp_top, $prec_imp_top, $prec_imp, $prec_imp); |
| ctx.fillStyle = \"rgb(100,100,255)\"; |
| ctx.fillRect (0, $total, $total, $del_base_height); |
| ctx.fillStyle = \"rgb($del_color)\"; |
| ctx.fillRect (0, ".($total+$del_base_height).", $total, $del_imp_height); |
| </script>"; |
| } |
| } |
| print "</td>"; |
| } |
| print "<td>$alt</td></tr>"; |
| } |
| print "<tr><td></td>"; |
| for($base=-1;$base<=$max_base_log_count;$base++) { |
| print "<td align=center>$base</td>"; |
| } |
| print "<td></td></tr></table><div id=\"CoverageMatrixDetails\"></div>"; |
| } |
|
|
| function precision_by_coverage_diff_matrix_details() { |
| $alt = $_GET["alt"]; |
| $base = $_GET["base"]; |
|
|
| $impact_total = 0; |
| $data = file(get_current_analysis_filename("precision","precision-by-input-word")); |
| $word = array(); $class = array(); |
| for($i=0;$i<count($data);$i++) { |
| $line = rtrim($data[$i]); |
| $item = split("\t",$line); |
| if ($item[4] == 0) { $log_count = -1; } |
| else { $log_count = (int) (log($item[4])/log(2)); } |
| if ($log_count == $base) { |
| $surface = $item[5]; |
| $word[$surface] = array(); |
| $word[$surface]["precision"] = $item[0]; |
| $word[$surface]["delete"] = $item[1]; |
| $word[$surface]["total"] = $item[2]; |
| $word[$surface]["coverage"] = $item[4]; |
| } |
| $impact_total += $item[2]; |
| } |
|
|
| print "<table border=1><tr><td align=center> </td><td align=center colspan=3>Precision</td><td align=center colspan=2>Precision Impact</td><td align=center colspan=3>Delete</td><td align=center colspan=2>Delete Impact</td></tr>\n"; |
|
|
| |
| $data = file(get_current_analysis_filename2("precision","precision-by-input-word")); |
| for($i=0;$i<count($data);$i++) { |
| $line = rtrim($data[$i]); |
| $item = split("\t",$line); |
| if ($item[4] == 0) { $log_count = -1; } |
| else { $log_count = (int) (log($item[4])/log(2)); } |
| $surface = $item[5]; |
| if ($log_count-$base == $alt && array_key_exists($surface,$word)) { |
| $precision = $item[0]; |
| $delete = $item[1]; |
| $total = $item[3]; |
| $coverage = $item[4]; |
| $surface = $item[5]; |
| $out = sprintf("%07d", (1-($precision-$word[$surface]["precision"])/$impact_total)*1000000); |
| $out .= "\t<tr><td align=cente>$surface</td>"; |
| $out .= sprintf("<td align=right>%.1f%s</td><td align=right>%+.1f%s</td><td align=right><font size=-1>%+.1f/%d</font></td>", |
| $precision/$total*100,"%", |
| ($precision-$word[$surface]["precision"])/$total*100,"%", |
| $precision-$word[$surface]["precision"],$total); |
| $out .= sprintf("<td align=right>%+.2f%s</td><td align=right><font size=-1>%+.1f/%d</font></td>", |
| ($precision-$word[$surface]["precision"])/$impact_total*100,"%", |
| $precision-$word[$surface]["precision"],$impact_total); |
| $out .= sprintf("<td align=right>%.1f%s</td><td align=right>%+.1f%s</td><td align=right><font size=-1>%+.1f/%d</font></td>", |
| $delete/$total*100,"%", |
| ($delete-$word[$surface]["delete"])/$total*100,"%", |
| $delete-$word[$surface]["delete"],$total); |
| $out .= sprintf("<td align=right>%+.2f%s</td><td align=right><font size=-1>%+.1f/%d</font></td>", |
| ($delete-$word[$surface]["delete"])/$impact_total*100,"%", |
| $delete-$word[$surface]["delete"],$impact_total); |
| $out .= "</tr>"; |
| $all_out[] = $out; |
| } |
| } |
| sort($all_out); |
| foreach($all_out as $out) { $o = explode("\t",$out); print $o[1]; } |
| print "</table>"; |
| } |
|
|
| function precision_by_coverage_diff_graph($name,$log_info,$log_info_new,$total,$img_width,$sort_type) { |
| $keys = array_keys($log_info); |
| sort($keys,$sort_type); |
|
|
| print "<div id=\"Toggle$name\" onClick=\"document.getElementById('Table$name').style.display = 'none'; this.style.display = 'none';\" style=\"display:none;\"><font size=-2>(hide table)</font></div>\n"; |
| print "<div id=\"Table$name\" style=\"display:none;\">\n"; |
| print "<table border=1><tr><td align=center> </td><td align=center colspan=3>Precision</td><td align=center colspan=2>Precision Impact</td><td align=center colspan=3>Delete</td><td align=center colspan=2>Delete Impact</td><td align=center>Length</td></tr>\n"; |
| foreach ($keys as $i) { |
| if (array_key_exists($i,$log_info)) { |
| print "<tr><td align=center>$i</td>"; |
| printf("<td align=right>%.1f%s</td><td align=right>%.1f%s</td><td align=right><font size=-1>%+.1f/%d</font></td>",$log_info_new[$i]["precision"]/$log_info[$i]["total"]*100,"%",$log_info[$i]["precision"]/$log_info[$i]["total"]*100,"%",$log_info[$i]["precision"],$log_info[$i]["total"]); |
| printf("<td align=right>%+.2f%s</td><td align=right><font size=-1>%+.1f/%d</font></td>",$log_info[$i]["precision"]/$total*100,"%",$log_info[$i]["precision"],$total); |
| printf("<td align=right>%.1f%s</td><td align=right>%+.1f%s</td><td align=right><font size=-1>%+d/%d</font></td>",$log_info_new[$i]["delete"]/$log_info[$i]["total"]*100,"%",$log_info[$i]["delete"]/$log_info[$i]["total"]*100,"%",$log_info[$i]["delete"],$log_info[$i]["total"]); |
| printf("<td align=right>%+.2f%s</td><td align=right><font size=-1>%+d/%d</font></td>",$log_info[$i]["delete"]/$total*100,"%",$log_info[$i]["delete"],$total); |
| printf("<td align=right>%+.3f</td>",$log_info[$i]["length"]/$log_info[$i]["total"]); |
| print "<td><A HREF=\"javascript:generic_show_diff('PrecisionByWordDiff$name','type=$i')\">Ⓘ</A></td>"; |
| print "</tr>"; |
| } |
| } |
| print "</table><div id=\"PrecisionByWordDiff$name\"></div></div>"; |
|
|
| print "<div id=\"Graph$name\" onClick=\"document.getElementById('Table$name').style.display = 'block'; document.getElementById('Toggle$name').style.display = 'block';\"><canvas id=\"$name\" width=$img_width height=300></canvas></div>"; |
| print "<script language=\"javascript\"> |
| var canvas = document.getElementById(\"$name\"); |
| var ctx = canvas.getContext(\"2d\"); |
| ctx.lineWidth = 0.5; |
| ctx.font = '9px serif'; |
| "; |
| for($line=-1;$line<=0.8;$line+=.2) { |
| $height = 90-$line/2*180; |
| print "ctx.moveTo(20, $height);\n"; |
| print "ctx.lineTo($img_width, $height);\n"; |
| print "ctx.fillText(\"".sprintf("%d",10 * $line * 1.001)."\%\", 0, $height+4);"; |
| } |
| for($line=-0.4;$line<=0.4;$line+=.2) { |
| $height = 250+$line/2*180; |
| print "ctx.moveTo(20, $height);\n"; |
| print "ctx.lineTo($img_width, $height);\n"; |
| if ($line != 0) { |
| print "ctx.fillText(\"".sprintf("%d",10 * $line * 1.001)."\%\", 0, $height+4);"; |
| } |
| } |
| print "ctx.strokeStyle = \"rgb(100,100,100)\"; ctx.stroke();\n"; |
|
|
| $total = 0; |
| foreach ($keys as $i) { |
| $total += $log_info[$i]["total"]; |
| } |
| $total_so_far = 0; |
| foreach ($keys as $i) { |
| $prec_ratio = $log_info[$i]["precision"]/$log_info[$i]["total"]; |
| $x = (int)(20+($img_width-20) * $total_so_far / $total); |
| $y = (int)(90-($prec_ratio*180*5)); |
| $width = (int)($img_width * $log_info[$i]["total"]/$total); |
| $height = (int)($prec_ratio*180*5); |
| print "ctx.fillStyle = \"rgb(200,200,0)\";"; |
| print "ctx.fillRect ($x, $y, $width, $height);"; |
|
|
| $del_ratio = $log_info[$i]["delete"]/$log_info[$i]["total"]; |
| $height = (int)($del_ratio*180*5); |
| print "ctx.fillStyle = \"rgb(100,100,255)\";"; |
| print "ctx.fillRect ($x, 250, $width, $height);"; |
|
|
| $total_so_far += $log_info[$i]["total"]; |
|
|
| if ($width>3) { |
| print "ctx.fillStyle = \"rgb(0,0,0)\";"; |
| |
| print "ctx.fillText(\"$i\", $x+$width/2-3, 190);"; |
| |
| } |
| } |
| print "</script>"; |
| } |
|
|
|
|
| |
| function precision_recall_details_diff() { |
| ?> |
| <table width=100%> |
| <tr> |
| <td width=25% valign=top><div id="nGramPrecision1">(loading...)</div></td> |
| <td width=25% valign=top><div id="nGramPrecision2">(loading...)</div></td> |
| <td width=25% valign=top><div id="nGramPrecision3">(loading...)</div></td> |
| <td width=25% valign=top><div id="nGramPrecision4">(loading...)</div></td> |
| </tr><tr> |
| <td width=25% valign=top><div id="nGramRecall1">(loading...)</div></td> |
| <td width=25% valign=top><div id="nGramRecall2">(loading...)</div></td> |
| <td width=25% valign=top><div id="nGramRecall3">(loading...)</div></td> |
| <td width=25% valign=top><div id="nGramRecall4">(loading...)</div></td> |
| </tr></table> |
| <script language="javascript"> |
| ngram_diff('precision',1,5,'',0); |
| ngram_diff('precision',2,5,'',0); |
| ngram_diff('precision',3,5,'',0); |
| ngram_diff('precision',4,5,'',0); |
| ngram_diff('recall',1,5,'',0); |
| ngram_diff('recall',2,5,'',0); |
| ngram_diff('recall',3,5,'',0); |
| ngram_diff('recall',4,5,'',0); |
| </script> |
| <?php |
| } |
|
|
| function ngram_summary_diff() { |
| global $experiment,$evalset,$dir,$set,$id,$id2; |
|
|
| |
| for($idx=0;$idx<2;$idx++) { |
| $data = file(get_analysis_filename($dir,$set,$idx?$id2:$id,"basic","summary")); |
| for($i=0;$i<count($data);$i++) { |
| $item = split(": ",$data[$i]); |
| $info[$idx][$item[0]] = $item[1]; |
| } |
| } |
|
|
| print "<table cellspacing=5 width=100%><tr><td valign=top align=center bgcolor=#eeeeee>"; |
| print "<b>Precision of Output</b><br>"; |
| |
| $type = "precision"; |
| print "<table><tr><td>$type</td><td>1-gram</td><td>2-gram</td><td>3-gram</td><td>4-gram</td></tr>\n"; |
| printf("<tr><td>correct</td><td>%d (%+d)</td><td>%d (%+d)</td><td>%d (%+d)</td><td>%d (%+d)</td></tr>\n", |
| $info[1]["$type-1-correct"],$info[1]["$type-1-correct"]-$info[0]["$type-1-correct"], |
| $info[1]["$type-2-correct"],$info[1]["$type-2-correct"]-$info[0]["$type-2-correct"], |
| $info[1]["$type-3-correct"],$info[1]["$type-3-correct"]-$info[0]["$type-3-correct"], |
| $info[1]["$type-4-correct"],$info[1]["$type-4-correct"]-$info[0]["$type-4-correct"]); |
| printf("<tr><td> </td><td>%.1f%s (%+.1f%s)</td><td>%.1f%s (%+.1f%s)</td><td>%.1f%s (%+.1f%s)</td><td>%.1f%s (%+.1f%s)</td></tr>\n", |
| $info[1]["$type-1-correct"]/$info[1]["$type-1-total"]*100,'%',$info[1]["$type-1-correct"]/$info[1]["$type-1-total"]*100-$info[0]["$type-1-correct"]/$info[0]["$type-1-total"]*100,'%', |
| $info[1]["$type-2-correct"]/$info[1]["$type-2-total"]*100,'%',$info[1]["$type-2-correct"]/$info[1]["$type-2-total"]*100-$info[0]["$type-2-correct"]/$info[0]["$type-2-total"]*100,'%', |
| $info[1]["$type-3-correct"]/$info[1]["$type-3-total"]*100,'%',$info[1]["$type-3-correct"]/$info[1]["$type-3-total"]*100-$info[0]["$type-3-correct"]/$info[0]["$type-3-total"]*100,'%', |
| $info[1]["$type-4-correct"]/$info[1]["$type-4-total"]*100,'%',$info[1]["$type-4-correct"]/$info[1]["$type-4-total"]*100-$info[0]["$type-4-correct"]/$info[0]["$type-4-total"]*100,'%'); |
| printf("<tr><td>wrong</td><td>%d (%+d)</td><td>%d (%+d)</td><td>%d (%+d)</td><td>%d (%+d)</td></tr>\n", |
| $info[1]["$type-1-total"]-$info[1]["$type-1-correct"],($info[1]["$type-1-total"]-$info[1]["$type-1-correct"])-($info[0]["$type-1-total"]-$info[0]["$type-1-correct"]), |
| $info[1]["$type-2-total"]-$info[1]["$type-2-correct"],($info[1]["$type-2-total"]-$info[1]["$type-2-correct"])-($info[0]["$type-2-total"]-$info[0]["$type-2-correct"]), |
| $info[1]["$type-3-total"]-$info[1]["$type-3-correct"],($info[1]["$type-3-total"]-$info[1]["$type-3-correct"])-($info[0]["$type-3-total"]-$info[0]["$type-3-correct"]), |
| $info[1]["$type-4-total"]-$info[1]["$type-4-correct"],($info[1]["$type-4-total"]-$info[1]["$type-4-correct"])-($info[0]["$type-4-total"]-$info[0]["$type-4-correct"])); |
| print "</table>"; |
| |
|
|
| print "<A HREF=\"javascript:generic_show_diff('PrecisionRecallDetailsDiff','')\">details</A> "; |
| if (file_exists(get_current_analysis_filename("precision","precision-by-corpus-coverage")) && |
| file_exists(get_current_analysis_filename2("precision","precision-by-corpus-coverage"))) { |
| print "| <A HREF=\"javascript:generic_show_diff('PrecisionByCoverageDiff','')\">precision of input by coverage</A> "; |
| } |
|
|
| print "</td><td valign=top align=center bgcolor=#eeeeee>"; |
| print "<b>Metrics</b><br>\n"; |
|
|
| for($idx=0;$idx<2;$idx++) { |
| $each_score = explode(" ; ",$experiment[$idx?$id2:$id]->result[$set]); |
| for($i=0;$i<count($each_score);$i++) { |
| if (preg_match('/([\d\(\)\.\s]+) (BLEU[\-c]*)/',$each_score[$i],$match) || |
| preg_match('/([\d\(\)\.\s]+) (IBM[\-c]*)/',$each_score[$i],$match) || |
| preg_match('/([\d\(\)\.\s]+) (METEOR[\-c]*)/',$each_score[$i],$match)) { |
| $score[$match[2]][$idx] = $match[1]; |
| } |
| } |
| } |
| $header = ""; $score_line = ""; $diff_line = ""; |
| foreach ($score as $name => $value) { |
| $header .= "<td>$name</td>"; |
| $score_line .= "<td>".$score[$name][1]."</td>"; |
| $diff_line .= sprintf("<td>%+.2f</td>",$score[$name][1]-$score[$name][0]); |
| } |
| print "<table border=1><tr>".$header."</tr><tr>".$score_line."</tr><tr>".$diff_line."</tr></table>"; |
| printf("length-diff<br>%d (%+d)",$info[1]["precision-1-total"]-$info[1]["recall-1-total"],$info[1]["precision-1-total"]-$info[0]["precision-1-total"]); |
|
|
| print "</td><tr><table>"; |
| } |
|
|
| function bleu_diff() { |
| $count = $_GET['count']; |
| if ($count == 0) { $count = 5; } |
|
|
| print "<b>annotated sentences</b><br>"; |
| print "<font size=-1>sorted by "; |
|
|
| if ($_GET['sort'] == "order" || $_GET['sort'] == "") { |
| print "order "; |
| } |
| else { |
| print "<A HREF=\"javascript:diff('bleu','order',$count)\">order</A> "; |
| } |
|
|
| if ($_GET['sort'] == "better") { |
| print "order "; |
| } |
| else { |
| print "<A HREF=\"javascript:diff('bleu','better',$count)\">better</A> "; |
| } |
|
|
| if ($_GET['sort'] == "worse") { |
| print "order "; |
| } |
| else { |
| print "<A HREF=\"javascript:diff('bleu','worse',$count)\">worse</A> "; |
| } |
|
|
| print "display <A HREF=\"\">fullscreen</A> "; |
|
|
| $count = $_GET['count']; |
| if ($count == 0) { $count = 5; } |
| print "showing $count "; |
| print "<A HREF=\"javascript:diff('bleu','" . $_GET['sort'] . "',5+$count)\">more</A> "; |
| if ($count > 5) { |
| print "<A HREF=\"javascript:diff('bleu','" . $_GET['sort'] . "',$count-5)\">less</A> "; |
| } |
| print "<A HREF=\"javascript:diff('bleu','" . $_GET['sort'] . "',9999)\">all</A> "; |
|
|
| print "</font><BR>\n"; |
|
|
| bleu_diff_annotation(); |
| print "<font size=-1>"; |
| print "<A HREF=\"javascript:diff('bleu','" . $_GET['sort'] . "',5+$count)\">more</A> "; |
| print "</font><BR>\n"; |
| } |
|
|
| function bleu_diff_annotation() { |
| global $set,$id,$id2,$dir; |
|
|
| |
| $input_annotation = file(get_analysis_filename($dir,$set,$id,"coverage","input-annotation")); |
| for($i=0;$i<count($input_annotation);$i++) { |
| $item = split("\t",$input_annotation[$i]); |
| $input[$i] = $item[0]; |
| } |
|
|
| |
| for($idx=0;$idx<2;$idx++) { |
| $data = file(get_analysis_filename($dir,$set,$idx?$id2:$id,"basic","bleu-annotation")); |
| for($i=0;$i<count($data);$i++) { |
| $item = split("\t",$data[$i]); |
| $annotation[$item[1]]["bleu$idx"] = $item[0]; |
| $annotation[$item[1]]["system$idx"] = $item[2]; |
| $annotation[$item[1]]["reference"] = $item[3]; |
| $annotation[$item[1]]["id"] = $item[1]; |
| } |
| } |
| $data = array(); |
|
|
| $identical=0; $same=0; $better=0; $worse=0; |
| for($i=0;$i<count($annotation);$i++) { |
| if ($annotation[$i]["system1"] == $annotation[$i]["system0"]) { |
| $identical++; |
| } |
| else if ($annotation[$i]["bleu1"] == $annotation[$i]["bleu0"]) { |
| $same++; |
| } |
| else if ($annotation[$i]["bleu1"] > $annotation[$i]["bleu0"]) { |
| $better++; |
| } |
| else { |
| $worse++; |
| } |
| } |
|
|
| print "<table><tr><td>identical</td><td>same</td><td>better</td><td>worse</td></tr>\n"; |
| printf("<tr><td>%d</td><td>%d</td><td>%d</td><td>%d</td></tr>\n", $identical, $same, $better, $worse); |
| printf("<tr><td>%d%s</td><td>%d%s</td><td>%d%s</td><td>%d%s</td></tr></table>\n", $identical*100/count($annotation)+.5, '%', $same*100/count($annotation)+.5, '%', $better*100/count($annotation)+.5, '%', $worse*100/count($annotation)+.5, '%'); |
|
|
| |
|
|
| |
| global $sort; |
| $sort = $_GET['sort']; |
| if ($sort == '') { |
| $sort = "order"; |
| } |
| function cmp($a, $b) { |
| global $sort; |
| if ($sort == "worse") { |
| $a_idx = $a["bleu1"]-$a["bleu0"]; |
| $b_idx = $b["bleu1"]-$b["bleu0"]; |
| } |
| else if ($sort == "better") { |
| $a_idx = -$a["bleu1"]+$a["bleu0"]; |
| $b_idx = -$b["bleu1"]+$b["bleu0"]; |
| } |
|
|
| if ($a_idx == $b_idx) { |
| return 0; |
| } |
| return ($a_idx < $b_idx) ? -1 : 1; |
| } |
|
|
| if ($sort != 'order') { |
| usort($annotation, 'cmp'); |
| } |
|
|
| $count = $_GET['count']; |
| if ($count == 0) { $count = 5; } |
|
|
| |
| for($i=0;$i<$count && $i<count($annotation);$i++) { |
| $line = $annotation[$i]; |
| print "<font size=-2>[src]</font> ".$input[$line["id"]]."<br>"; |
|
|
| $word_with_score1 = split(" ",$line["system1"]); |
| $word_with_score0 = split(" ",$line["system0"]); |
| $word1 = split(" ",preg_replace("/\|\d/","",$line["system1"])); |
| $word0 = split(" ",preg_replace("/\|\d/","",$line["system0"])); |
| $matched_with_score = string_edit_distance($word_with_score0,$word_with_score1); |
| $matched = string_edit_distance($word0,$word1); |
|
|
| print "<font size=-2>[".$id2."-".$line["id"].":".$line["bleu1"]."]</font> "; |
| $matched1 = preg_replace('/D/',"",$matched); |
| $matched_with_score1 = preg_replace('/D/',"",$matched_with_score); |
| bleu_line_diff( $word_with_score1, $matched1, $matched_with_score1 ); |
|
|
| print "<font size=-2>[".$id."-".$line["id"].":".$line["bleu0"]."]</font> "; |
| $matched0 = preg_replace('/I/',"",$matched); |
| $matched_with_score0 = preg_replace('/I/',"",$matched_with_score); |
| bleu_line_diff( $word_with_score0, $matched0, $matched_with_score0 ); |
|
|
| print "<font size=-2>[ref]</font> ".$line["reference"]."<hr>"; |
| } |
| } |
|
|
| function bleu_line_diff( $word,$matched,$matched_with_score ) { |
| $color = array("#FFC0C0","#FFC0FF","#C0C0FF","#C0FFFF","#C0FFC0"); |
| $lcolor = array("#FFF0F0","#FFF0FF","#F0F0FF","#F0FFFF","#F0FFF0"); |
| for($j=0;$j<count($word);$j++) { |
| list($surface,$correct) = split("\|", $word[$j]); |
| if (substr($matched_with_score,$j,1) == "M") { |
| $style = "background-color: $lcolor[$correct];"; |
| } |
| else { |
| $style = "background-color: $color[$correct];"; |
| } |
| if (substr($matched,$j,1) == "M") { |
| $style .= "color: #808080;"; |
| } |
| print "<span style=\"$style\">$surface</span> "; |
| } |
| print "<br>"; |
| } |
|
|
| function ngram_diff($type) { |
| global $set,$id,$id2,$dir; |
|
|
| ini_set('memory_limit',1e9); |
|
|
| |
| $order = $_GET['order']; |
|
|
| for($idx=0;$idx<2;$idx++) { |
| $data = file(get_analysis_filename($dir,$set,$idx?$id2:$id,"basic","n-gram-$type.$order")); |
| for($i=0;$i<count($data);$i++) { |
| $item = split("\t",$data[$i]); |
| $ngram_hash[$item[2]]["total$idx"] = $item[0]; |
| $ngram_hash[$item[2]]["correct$idx"] = $item[1]; |
| } |
| unset($data); |
| } |
|
|
| |
| $sort = $_GET['sort']; |
| $smooth = $_GET['smooth']; |
| if ($sort == '') { |
| $sort = 'ratio_worse'; |
| $smooth = 1; |
| } |
|
|
| error_reporting(E_ERROR); |
|
|
| |
| foreach ($ngram_hash as $n => $value) { |
| $item = $value; |
| |
| |
| |
| |
| $item["ngram"] = $n; |
|
|
| if ($sort == "abs_worse") { |
| $item["index"] = (2*$item["correct1"] - $item["total1"]) |
| - (2*$item["correct0"] - $item["total0"]); |
| } |
| else if ($sort == "abs_better") { |
| $item["index"] = - (2*$item["correct1"] - $item["total1"]) |
| + (2*$item["correct0"] - $item["total0"]); |
| } |
| else if ($sort == "ratio_worse") { |
| $item["index"] = |
| ($item["correct1"] + $smooth) / ($item["total1"] + $smooth) |
| - ($item["correct0"] + $smooth) / ($item["total0"] + $smooth); |
| } |
| else if ($sort == "ratio_better") { |
| $item["index"] = |
| - ($item["correct1"] + $smooth) / ($item["total1"] + $smooth) |
| + ($item["correct0"] + $smooth) / ($item["total0"] + $smooth); |
| } |
| $ngram[] = $item; |
| unset($ngram_hash[$n]); |
| } |
| unset($ngram_hash); |
|
|
| |
| function cmp($a, $b) { |
| if ($a["index"] == $b["index"]) { |
| return 0; |
| } |
| return ($a["index"] < $b["index"]) ? -1 : 1; |
| } |
|
|
| usort($ngram, 'cmp'); |
|
|
| |
| $count = $_GET['count']; |
| if ($count == 0) { $count = 5; } |
|
|
| print "<B>$order-gram $type</B><br><font size=-1>sorted by<br>"; |
| if ($sort == "ratio_worse") { |
| print "ratio worse "; |
| print "smooth-$smooth "; |
| print "<A HREF=\"javascript:ngram_diff('$type',$order,$count,'ratio_worse',$smooth+1)\">+</A> "; |
| print "<A HREF=\"javascript:ngram_diff('$type',$order,$count,'ratio_worse',$smooth-1)\">-</A>,"; |
| } |
| else { |
| print "<A HREF=\"javascript:ngram_diff('$type',$order,$count,'ratio_worse',1)\">ratio worse</A>, "; |
| } |
| if ($sort == "abs_worse") { |
| print "absolute worse, "; |
| } |
| else { |
| print "<A HREF=\"javascript:ngram_diff('$type',$order,$count,'abs_worse',0)\">absolute worse</A>, "; |
| } |
|
|
| print "<br>"; |
| if ($sort == "ratio_better") { |
| print "ratio better "; |
| print "smooth-$smooth "; |
| print "<A HREF=\"javascript:ngram_diff('$type',$order,$count,'ratio_better',$smooth+1)\">+</A> "; |
| print "<A HREF=\"javascript:ngram_diff('$type',$order,$count,'ratio_better',$smooth-1)\">-</A>,"; |
| } |
| else { |
| print "<A HREF=\"javascript:ngram_diff('$type',$order,$count,'ratio_better',1)\">ratio better</A>, "; |
| } |
| if ($sort == "abs_better") { |
| print "absolute better, "; |
| } |
| else { |
| print "<A HREF=\"javascript:ngram_diff('$type',$order,$count,'abs_better',0)\">absolute better</A>, "; |
| } |
|
|
| print "<br>showing $count "; |
| if ($count < 9999) { |
| print "<A HREF=\"javascript:ngram_diff('$type',$order,$count+5,'$sort',$smooth)\">more</A> "; |
| if ($count > 5) { |
| print "<A HREF=\"javascript:ngram_diff('$type',$order,$count-5,'$sort',$smooth)\">less</A> "; |
| } |
| print "<A HREF=\"javascript:ngram_diff('$type',$order,9999,'$sort',$smooth)\">all</A> "; |
| } |
| else { |
| print "<A HREF=\"javascript:ngram_diff('$type',$order,5,'$sort',$smooth)\">top5</A> "; |
| } |
|
|
| print "<br>\n"; |
|
|
| print "<table width=100%>\n"; |
| print "<tr><td>$order-gram</td>"; |
| if ($type == 'recall') { |
| print "<td>Δ</td><td>ok</td><td>x</td></tr>\n"; |
| } |
| else { |
| print "<td align=right>Δ</td><td>ok</td><td align=right>Δ</td><td>x</td></tr>\n"; |
| } |
| for($i=0;$i<$count && $i<count($ngram);$i++) { |
| $line = $ngram[$i]; |
| print "<tr><td>".$line["ngram"]."</td>"; |
| $ok = $line["correct1"]; |
| $ok_diff = $ok - $line["correct0"]; |
| $wrong = $line["total1"] - $line["correct1"]; |
| $wrong_diff = $wrong - ($line["total0"]-$line["correct0"]); |
| if ($type == 'recall') { |
| printf("<td>%+d</td><td>%d</td><td>%d</td></tr>", $ok_diff,$ok,$wrong); |
| } |
| else { |
| printf("<td align=right>%+d</td><td>(%d)</td><td align=right>%+d</td><td>(%d)</td></tr>", $ok_diff,$ok,$wrong_diff,$wrong); |
| } |
| } |
| print "</table>\n"; |
| } |
|
|
| function string_edit_distance($a,$b) { |
| $cost = array( array( 0 ) ); |
| $back = array( array( "" ) ); |
|
|
| |
| for($i=0;$i<count($a);$i++) { |
| $cost[$i+1][0] = $i+1; |
| } |
| for($j=0;$j<count($b);$j++) { |
| $cost[0][$j+1] = $j+1; |
| } |
|
|
| |
| for($i=1;$i<=count($a);$i++) { |
| for($j=1;$j<=count($b);$j++) { |
| $match_cost = ($a[$i-1] == $b[$j-1]) ? 0 : 1; |
| $c = $match_cost + $cost[$i-1][$j-1]; |
| $p = $match_cost ? "S" : "M"; |
| if ($cost[$i-1][$j]+1 < $c) { |
| $c = $cost[$i-1][$j]+1; |
| $p = "D"; |
| } |
| if ($cost[$i][$j-1]+1 < $c) { |
| $c = $cost[$i][$j-1]+1; |
| $p = "I"; |
| } |
| $cost[$i][$j] = $c; |
| $back[$i][$j] = $p; |
| } |
| } |
|
|
| |
| $i=count($a); |
| $j=count($b); |
| $path = ""; |
| while($i>0 || $j>0) { |
| if ($back[$i][$j] == "M" || $back[$i][$j] == "S") { |
| $path = $back[$i][$j] . $path; |
| $i--; $j--; |
| } |
| else if($i==0 || $back[$i][$j] == "I") { |
| $path = "I".$path; |
| $j--; |
| } |
| else { |
| $path = "D".$path; |
| $i--; |
| } |
| } |
| return $path; |
| } |
|
|