| const Tesseract = require("tesseract.js"); |
| const fs = require("fs"); |
| const path = require("path"); |
| const sharp = require("sharp"); |
|
|
| const buf = b => Buffer.from(b.replace(/^data:image\/\w+;base64,/, ""), "base64"); |
|
|
| |
| |
| |
| const LEET = { |
| a:"4", e:"3", g:"9", i:"1", l:"1", o:"0", s:"5", t:"7", b:"8", z:"2", |
| "@":"4", "$":"5", "&":"8" |
| }; |
|
|
| function leetize(str) { |
| return str |
| .toLowerCase() |
| .split("") |
| .map(c => LEET[c] ?? c) |
| .join(""); |
| } |
|
|
| |
| |
| |
| function levenshtein(a, b) { |
| const m = []; |
| for (let i = 0; i <= a.length; i++) { |
| m[i] = [i]; |
| for (let j = 1; j <= b.length; j++) { |
| m[i][j] = |
| i === 0 |
| ? j |
| : Math.min( |
| m[i - 1][j] + 1, |
| m[i][j - 1] + 1, |
| m[i - 1][j - 1] + (a[i - 1] === b[j - 1] ? 0 : 1) |
| ); |
| } |
| } |
| return m[a.length][b.length]; |
| } |
|
|
| function similarity(a, b) { |
| const dist = levenshtein(a, b); |
| return 1 - dist / Math.max(a.length, b.length); |
| } |
|
|
| |
| |
| |
| async function preprocessSoal(b) { |
| return sharp(b) |
| .resize({ width: 1000 }) |
| .grayscale() |
| .normalize() |
| .sharpen({ sigma: 2 }) |
| .median(2) |
| .toBuffer(); |
| } |
|
|
| async function preprocessBot(b) { |
| return sharp(b) |
| .resize({ width: 800 }) |
| .grayscale() |
| .normalize() |
| .sharpen({ sigma: 1.5 }) |
| .median(1) |
| .toBuffer(); |
| } |
|
|
| async function ocr(buf, mode = "soal") { |
| const tmp = path.join(__dirname, "tmp_" + Date.now() + ".png"); |
| fs.writeFileSync(tmp, buf); |
|
|
| const whitelist = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789@$&"; |
|
|
| try { |
| const r = await Tesseract.recognize(tmp, "eng", { |
| tessedit_char_whitelist: whitelist, |
| tessedit_pageseg_mode: mode === "soal" ? "7" : "8", |
| tessedit_ocr_engine_mode: "1" |
| }); |
| fs.unlinkSync(tmp); |
| return r.data.text.trim(); |
| } catch (error) { |
| fs.unlinkSync(tmp); |
| console.error("OCR Error:", error); |
| return ""; |
| } |
| } |
|
|
| |
| |
| |
| module.exports = async data => { |
| try { |
| |
| const soalImg = buf(data.main); |
| const soalProcessed = await preprocessSoal(soalImg); |
| const soalText = await ocr(soalProcessed, "soal"); |
|
|
| const soalRaw = soalText |
| .split(/\s+/) |
| .filter(x => x) |
| .slice(0, 3); |
|
|
| const soalLeet = soalRaw.map(leetize); |
|
|
| |
| const botResults = []; |
| for (const b of data.bots) { |
| const d = buf(b.img); |
| const p = await preprocessBot(d); |
| const t = await ocr(p, "bot"); |
| |
| const clean = t.replace(/[^A-Za-z0-9@$&]/g, ""); |
| const leet = leetize(clean); |
| |
| botResults.push({ |
| id: b.id, |
| text: clean, |
| value: leet |
| }); |
| } |
|
|
| |
| const result = []; |
| const usedBots = new Set(); |
|
|
| |
| for (const s of soalLeet) { |
| let bestBot = null; |
| let bestScore = 0; |
|
|
| for (const bot of botResults) { |
| if (usedBots.has(bot.id) || !bot.value) continue; |
| |
| const score = similarity(s, bot.value); |
| if (score > bestScore && score >= 0.3) { |
| bestScore = score; |
| bestBot = bot.id; |
| } |
| } |
|
|
| if (bestBot) { |
| usedBots.add(bestBot); |
| result.push(bestBot); |
| } else { |
| result.push(null); |
| } |
| } |
|
|
| |
| for (let i = 0; i < result.length; i++) { |
| if (result[i] === null) { |
| |
| for (const bot of botResults) { |
| if (!usedBots.has(bot.id) && bot.value) { |
| result[i] = bot.id; |
| usedBots.add(bot.id); |
| break; |
| } |
| } |
| } |
| } |
|
|
| return { |
| soal: soalRaw, |
| soalLeet, |
| botResults, |
| result |
| }; |
| |
| } catch (error) { |
| console.error("Main function error:", error); |
| return { |
| soal: [], |
| soalLeet: [], |
| botResults: [], |
| result: [] |
| }; |
| } |
| }; |