|
|
|
|
|
|
|
|
String.prototype.replaceAll = function(search, replacement) { |
|
|
var target = this; |
|
|
return target.replace(new RegExp(search, 'g'), replacement); |
|
|
}; |
|
|
|
|
|
|
|
|
function print(val) { console.log(val) } |
|
|
|
|
|
function dictFormat(s, valueDict) { |
|
|
ret = s; |
|
|
|
|
|
s.match(/\{[^}]+\}/g).forEach(function(repName){ |
|
|
key = repName.replace(/[{}]/g, ""); |
|
|
ret = ret.replace(repName, valueDict[key]) |
|
|
}); |
|
|
return ret; |
|
|
} |
|
|
|
|
|
function get(url) { |
|
|
|
|
|
return new Promise(function(resolve, reject) { |
|
|
|
|
|
var req = new XMLHttpRequest(); |
|
|
req.open('GET', url); |
|
|
|
|
|
req.onload = function() { |
|
|
|
|
|
|
|
|
if (req.status == 200) { |
|
|
|
|
|
resolve(req.response); |
|
|
} |
|
|
else { |
|
|
|
|
|
|
|
|
reject(Error(req.statusText)); |
|
|
} |
|
|
}; |
|
|
|
|
|
|
|
|
req.onerror = function() { |
|
|
reject(Error("Network Error")); |
|
|
}; |
|
|
|
|
|
|
|
|
req.send(); |
|
|
}); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
async function read_rules(filename) { |
|
|
|
|
|
hidden = document.getElementById('x') |
|
|
if (hidden.value.length == 0 || filename != "") { |
|
|
|
|
|
rawdata = await get(filename); |
|
|
} |
|
|
else { |
|
|
|
|
|
rawdata = hidden.value |
|
|
} |
|
|
console.log(rawdata) |
|
|
|
|
|
const data = rawdata.split("\n"); |
|
|
|
|
|
let headers = data[0].split(",") |
|
|
|
|
|
for (var i=0; i<data.length; i++) { |
|
|
if (data[i].length > 0 && data[i][0] != "#") { |
|
|
headers = data[i].split(",") |
|
|
break |
|
|
} |
|
|
} |
|
|
rule_list = [] |
|
|
|
|
|
for (var i=0; i<data.length; i++) { |
|
|
if (data[i].length > 0 && data[i][0] != "#") { |
|
|
var split_rule = data[i].split(","); |
|
|
if (split_rule[0] != headers[0]) { |
|
|
var rule_dict = {} |
|
|
for (var j=0; j<headers.length; j++) { |
|
|
rule_dict[headers[j]] = split_rule[j] |
|
|
} |
|
|
rule_list.push(rule_dict) |
|
|
} |
|
|
} |
|
|
} |
|
|
return rule_list; |
|
|
} |
|
|
|
|
|
class SubRule { |
|
|
constructor(rule, classes) { |
|
|
const headers = ["sfrom", "sto", "precede", "follow", "weight"] |
|
|
for (var i=0; i<headers.length; i++) { |
|
|
let key = headers[i] |
|
|
let value = rule[key] |
|
|
if (key == "sto") { |
|
|
if (!(value == "@" || value == " @")) { |
|
|
value = value.replaceAll(/\\([1-9])/, "@$1").replaceAll("@", "$") |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
if (key == "sfrom" && value.includes("\\b")) { |
|
|
value = value.replace(/\\b$/, "(?=\\s|$)"); |
|
|
value = value.replace(/^\\b/, "(?<=^|\\s)"); |
|
|
} |
|
|
|
|
|
|
|
|
const re = new RegExp('{.*}') |
|
|
|
|
|
while (re.test(value)) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
value = dictFormat(value, classes) |
|
|
} |
|
|
this[key] = value |
|
|
} |
|
|
this.weight = parseFloat(this.weight) |
|
|
this.sfrom_save = this.sfrom |
|
|
this.sfrom = new RegExp(this.sfrom) |
|
|
this.precede = new RegExp(this.precede+"$") |
|
|
this.follow = new RegExp("^"+this.follow) |
|
|
} |
|
|
|
|
|
sub_score(sfrom, precede, follow) { |
|
|
if (this.sfrom.test(sfrom) && this.precede.test(precede) && this.follow.test(follow)) { |
|
|
return this.weight |
|
|
} |
|
|
else { |
|
|
return null |
|
|
} |
|
|
} |
|
|
|
|
|
sub(x) { |
|
|
return x.replace(this.sfrom, this.sto) |
|
|
} |
|
|
} |
|
|
|
|
|
class AlphabetToIpa { |
|
|
constructor(rule_filepath) { |
|
|
this.rulenames = rule_filepath; |
|
|
read_rules(this.rulenames).then((rules) => { |
|
|
this.rule_list = rules; |
|
|
this.init(); |
|
|
}) |
|
|
} |
|
|
|
|
|
init() { |
|
|
this.classes = {} |
|
|
this.subs = new Set([]) |
|
|
this.ipasubs = new Set([]) |
|
|
this.words = {} |
|
|
this.matches = {} |
|
|
this.pre = [] |
|
|
this.NO_TRANSLATE = "@" |
|
|
|
|
|
|
|
|
for (var i=0; i<this.rule_list.length; i++) { |
|
|
let rule = this.rule_list[i] |
|
|
|
|
|
|
|
|
for (const key of Object.keys(rule)) { |
|
|
var item = rule[key]; |
|
|
if (typeof item !== "undefined" && item.match(/^\"\"*/)) { |
|
|
item = item.replace(/^\"/, "").replace(/\"$/, ""); |
|
|
rule[key] = item; |
|
|
} |
|
|
} |
|
|
|
|
|
if (rule["type"] == "pre") { |
|
|
this.pre.push([rule["sfrom"], rule["sto"]]) |
|
|
} |
|
|
else if (rule["type"] == "class") { |
|
|
this.classes[rule["sfrom"]] = rule["sto"] |
|
|
} |
|
|
else if (rule["type"] == "match") { |
|
|
var value = rule["sto"] |
|
|
const re = new RegExp('{.*}') |
|
|
while (re.test(value)) { |
|
|
value = dictFormat(value, this.classes) |
|
|
} |
|
|
this.matches[rule["sfrom"]] = value |
|
|
} |
|
|
else if (rule["type"] == "sub") { |
|
|
let subrule = new SubRule(rule, this.classes) |
|
|
this.subs.add(subrule) |
|
|
} |
|
|
else if (rule["type"] == "ipasub") { |
|
|
let ipasubrule = new SubRule(rule, this.classes) |
|
|
ipasubrule.sfrom = new RegExp(ipasubrule.sfrom, "g"); |
|
|
|
|
|
this.ipasubs.add(ipasubrule) |
|
|
} |
|
|
else if (rule["type"] == "word") { |
|
|
this.words[rule["sfrom"]] = rule["sto"].split() |
|
|
} |
|
|
else { |
|
|
console.log("Unrecognized rule type.") |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
translate(source) { |
|
|
|
|
|
if (source in this.words) { |
|
|
return this.words[source] |
|
|
} |
|
|
else { |
|
|
|
|
|
for (var i=0; i<this.pre.length; i++) { |
|
|
let prerule = this.pre[i] |
|
|
source = source.replace(prerule[0], prerule[1]) |
|
|
} |
|
|
source = source.toLowerCase() |
|
|
|
|
|
var source_list = source.split("") |
|
|
var target_list = [] |
|
|
|
|
|
|
|
|
if (Object.keys(this.matches).length > 1) { |
|
|
if (source.length > 1) { |
|
|
for (let letter in source) { |
|
|
var match = source[letter]; |
|
|
for (let item in this.matches) { |
|
|
var out = this.matches[item] |
|
|
if (match == item) { |
|
|
target_list.push(out); |
|
|
source_list.pop(); |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
else { |
|
|
for (let item in this.matches) { |
|
|
var out = this.matches[item] |
|
|
if (source == item) { |
|
|
target_list.push(out); |
|
|
source_list.pop(); |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
for (var i=0; i<source_list.length; i++) { |
|
|
let sfrom = source_list[i] |
|
|
let precede = source_list.slice(0,i).join("") |
|
|
let follow = source_list.slice(i+1).join("") |
|
|
|
|
|
var translations = [] |
|
|
this.subs.forEach(function(subrule) { |
|
|
let trans = [subrule.sub_score(sfrom, precede, follow), subrule.sub(sfrom)] |
|
|
translations.push(trans) |
|
|
}) |
|
|
|
|
|
translations = translations.filter(trans => trans[0]) |
|
|
|
|
|
|
|
|
if (translations.length > 0) { |
|
|
var translation = translations.sort(function(a,b) { return(b[0] - a[0]) })[0][1] |
|
|
if (translation.length > 0) { |
|
|
target_list.push(translation) |
|
|
} |
|
|
} |
|
|
else { |
|
|
target_list.push(this.NO_TRANSLATE) |
|
|
} |
|
|
} |
|
|
var target_string = (target_list).join(" ") |
|
|
print(target_string) |
|
|
|
|
|
var ipa_translations = [] |
|
|
this.ipasubs.forEach(function(ipasubrule) { |
|
|
let ipa_trans = [ipasubrule.weight, ipasubrule] |
|
|
ipa_translations.push(ipa_trans) |
|
|
}) |
|
|
ipa_translations = ipa_translations.sort(function(a,b) { return(b[0] - a[0]) }) |
|
|
for (var i=0; i<ipa_translations.length; i++) { |
|
|
let ipasubrule = ipa_translations[i][1]; |
|
|
console.log("ipasub", ipasubrule, "from:", target_string); |
|
|
|
|
|
target_string = target_string.replace(ipasubrule.sfrom, ipasubrule.sto); |
|
|
console.log("\tresult:", target_string); |
|
|
} |
|
|
|
|
|
print(target_string); |
|
|
return target_string.split() |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|