|
|
|
|
|
|
|
|
|
|
|
function get(url) { |
|
|
|
|
|
return new Promise(function(resolve, reject) { |
|
|
|
|
|
var req = new XMLHttpRequest(); |
|
|
req.open("GET", url); |
|
|
|
|
|
req.onload = function() { |
|
|
if(req.status == 200) { |
|
|
resolve(req.response); |
|
|
} |
|
|
else { |
|
|
reject(Error(req.statusText)); |
|
|
} |
|
|
}; |
|
|
|
|
|
req.onerror = function() { |
|
|
reject(Error("Network Error")); |
|
|
}; |
|
|
|
|
|
req.send(); |
|
|
}); |
|
|
} |
|
|
|
|
|
|
|
|
async function read_wordlists(filename) { |
|
|
|
|
|
if (!filename) { |
|
|
alert("Error: No file is listed for the language"); |
|
|
return; |
|
|
} |
|
|
|
|
|
var rawdata = await get(filename); |
|
|
var lines = rawdata.split("\n"); |
|
|
var word_list = {}; |
|
|
var lines2 = []; |
|
|
|
|
|
|
|
|
for (line in lines) { |
|
|
if (lines[line].match(/.*\p{Lu}.*/gu) == null) { |
|
|
lines2.push(lines[line]); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
for (var i=0; i < lines2.length; i++){ |
|
|
str = lines2[i]; |
|
|
word_freq = str.split(" "); |
|
|
word_list[word_freq[0]] = Number(word_freq[1]); |
|
|
} |
|
|
|
|
|
|
|
|
delete word_list[""]; |
|
|
for(word in word_list) { |
|
|
if (Number.isNaN(word_list[word])){ |
|
|
delete word_list[word]; |
|
|
} |
|
|
}; |
|
|
|
|
|
return word_list; |
|
|
} |
|
|
|
|
|
|
|
|
function read_input() { |
|
|
|
|
|
var x = document.getElementById("in1"); |
|
|
var text_string = document.getElementById("in1").value; |
|
|
|
|
|
if (text_string == "") { |
|
|
x.style.background = "#ff0000"; |
|
|
setTimeout(function() { |
|
|
x.style.background = "white"; |
|
|
}, 250); |
|
|
return ""; |
|
|
} |
|
|
else { |
|
|
|
|
|
|
|
|
text_string = text_string.toLowerCase(); |
|
|
text_string = text_string.replace(/[.,\/#!$%\^&\*;:{}=0-9\-_`~()\'\"\[\]]/g,""); |
|
|
text_string = text_string.replace(/\n+|\s+/g," "); |
|
|
var word_list = text_string.split(" "); |
|
|
|
|
|
if(word_list[word_list.length - 1] == "") { |
|
|
word_list.splice(-1, 1); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
var freqs = {}; |
|
|
|
|
|
for (var i=0; i < word_list.length; i++){ |
|
|
var freq_val = word_list[i]; |
|
|
if (freq_val in freqs) { |
|
|
freqs[freq_val]++; |
|
|
} |
|
|
else { |
|
|
freqs[freq_val] = 1; |
|
|
} |
|
|
} |
|
|
|
|
|
function orderfreq(a, b) { |
|
|
return freqs[b] - freqs[a]; |
|
|
} |
|
|
Object.keys(freqs).sort(orderfreq); |
|
|
|
|
|
return freqs; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
async function comp() { |
|
|
|
|
|
a = document.getElementById("rule_dropdown").value; |
|
|
word_list_new = read_input(); |
|
|
word_list_pre = await read_wordlists(a); |
|
|
|
|
|
var word_list_input = {}; |
|
|
|
|
|
for (word in word_list_new) { |
|
|
if (word_list_pre.hasOwnProperty(word)) { |
|
|
word_list_input[word] = word_list_new[word]; |
|
|
delete word_list_new[word] |
|
|
} |
|
|
} |
|
|
|
|
|
initial_prelength = Object.keys(word_list_pre).length |
|
|
|
|
|
|
|
|
for (word in word_list_new) { |
|
|
word_list_input[word] = word_list_new[word]; |
|
|
word_list_pre[word] = 0; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
input_total = Object.values(word_list_input).reduce((num_tot, num_new) => num_tot + num_new, 0); |
|
|
fixed_total = Object.values(word_list_pre).reduce((num_tot, num_new) => num_tot + num_new, 0); |
|
|
|
|
|
|
|
|
var input_list_probs = {}; |
|
|
var pre_list_probs = {} |
|
|
|
|
|
for (word in word_list_input) { |
|
|
prob = (word_list_input[word] + 1) / (input_total + Object.keys(word_list_input).length + 1); |
|
|
input_list_probs[word] = prob; |
|
|
} |
|
|
|
|
|
for (word in word_list_pre) { |
|
|
prob = (word_list_pre[word] + 1) / (fixed_total + initial_prelength + 1); |
|
|
pre_list_probs[word] = prob; |
|
|
} |
|
|
|
|
|
|
|
|
var kl_value; |
|
|
var kl_vals = []; |
|
|
var kl_pos = []; |
|
|
var kl_posvals = []; |
|
|
var kl_neg = []; |
|
|
var kl_negvals = []; |
|
|
var wc_word_list = []; |
|
|
|
|
|
for (word in input_list_probs) { |
|
|
if (pre_list_probs.hasOwnProperty(word)) { |
|
|
kl_value = input_list_probs[word] * Math.log(input_list_probs[word] / pre_list_probs[word]); |
|
|
kl_vals.push(kl_value); |
|
|
wc_word_list.push({word: word, kl: kl_value}); |
|
|
|
|
|
if (kl_value > 0) { |
|
|
kl_pos.push({word: word, kl: kl_value}); |
|
|
kl_posvals.push(kl_value); |
|
|
} |
|
|
else { |
|
|
kl_neg.push({word: word, kl: kl_value}); |
|
|
kl_negvals.push(Math.abs(kl_value)); |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return [kl_pos, kl_posvals, kl_neg, kl_negvals, wc_word_list, kl_vals]; |
|
|
} |
|
|
|