File size: 6,786 Bytes
4f05ffd | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 | const fs = require('fs');
const path = require('path');
const { extractTextFromImage, uploadImageToHosting } = require('./imageProcessor');
// Map angka kata -> digit
const NUMBER_WORDS = {
'nol': '0', 'satu': '1', 'dua': '2', 'tiga': '3', 'empat': '4', 'lima': '5',
'enam': '6', 'tujuh': '7', 'delapan': '8', 'sembilan': '9', 'sepuluh': '10',
'sebelas': '11', 'belas': '', 'puluh': '', 'ratus': '', 'ribu': ''
};
// Simple leet replacements
const LEET_MAP = {
'4': 'a', '@': 'a', '8': 'b', '3': 'e', '6': 'g', '1': 'i', '!': 'i', '0': 'o',
'5': 's', '$': 's', '7': 't', '+': 't', '2': 'z'
};
function safeString(s) {
return (s || '').toString();
}
async function extractTextFromBuffer(imageBuffer) {
try {
if (!imageBuffer) throw new Error('No image buffer provided');
if (!Buffer.isBuffer(imageBuffer)) throw new Error('extractTextFromBuffer expects a Buffer');
console.log('πΌοΈ DEBUG extractTextFromBuffer: Received buffer, size:', imageBuffer.length, 'bytes');
const result = await extractTextFromImage(imageBuffer);
console.log('β
DEBUG extractTextFromBuffer: Hasil ekstraksi:', result);
return result;
} catch (error) {
console.error('β DEBUG extractTextFromBuffer Error:', error.message);
return { status: false, response: 'Gagal memproses gambar' };
}
}
function removeAccents(str) {
return safeString(str).normalize('NFD').replace(/[\u0300-\u036f]/g, '');
}
function applyLeetMap(str) {
let out = '';
for (const ch of str) {
out += (LEET_MAP[ch] !== undefined) ? LEET_MAP[ch] : ch;
}
return out;
}
function wordsToNumbers(str) {
// Very simple conversion for single-word numbers in Indonesian (e.g., "tujuh" -> "7")
const tokens = str.split(/\s+/);
return tokens.map(t => {
const low = t.toLowerCase();
return NUMBER_WORDS[low] !== undefined ? NUMBER_WORDS[low] : t;
}).join(' ');
}
function normalizeText(text) {
const original = safeString(text);
let normalized = removeAccents(original);
normalized = normalized.toLowerCase();
normalized = applyLeetMap(normalized);
// keep letters, numbers and spaces only
normalized = normalized.replace(/[^a-z0-9\s]/g, ' ').replace(/\s+/g, ' ').trim();
// map simple number words
normalized = wordsToNumbers(normalized);
console.log(`π€ DEBUG normalizeText: "${original}" -> "${normalized}"`);
return normalized;
}
// Levenshtein distance for fuzzy matching
function levenshtein(a = '', b = '') {
const alen = a.length, blen = b.length;
if (alen === 0) return blen;
if (blen === 0) return alen;
const matrix = Array.from({ length: alen + 1 }, () => new Array(blen + 1));
for (let i = 0; i <= alen; i++) matrix[i][0] = i;
for (let j = 0; j <= blen; j++) matrix[0][j] = j;
for (let i = 1; i <= alen; i++) {
for (let j = 1; j <= blen; j++) {
const cost = a[i - 1] === b[j - 1] ? 0 : 1;
matrix[i][j] = Math.min(
matrix[i - 1][j] + 1,
matrix[i][j - 1] + 1,
matrix[i - 1][j - 1] + cost
);
}
}
return matrix[alen][blen];
}
function similarity(a, b) {
a = safeString(a);
b = safeString(b);
if (a === b) return 1;
const dist = levenshtein(a, b);
const maxLen = Math.max(a.length, b.length);
return maxLen === 0 ? 1 : 1 - (dist / maxLen);
}
function tryEvaluateMathExpression(s) {
// allow simple expressions like "3+4", " 7 - 2 ", "2 * (3+1)"
try {
const cleaned = s.replace(/[^0-9+\-*/().\s]/g, '');
if (!/[0-9]/.test(cleaned)) return null;
// eslint-disable-next-line no-new-func
const val = Function(`"use strict"; return (${cleaned});`)();
if (typeof val === 'number' && isFinite(val)) return String(val);
return null;
} catch {
return null;
}
}
// compare OCR value with expected soal (question) with multiple heuristics
function isValueMatch(value, soal, options = {}) {
const { fuzzyThreshold = 0.75 } = options;
console.log('π DEBUG isValueMatch: Value="%s", Soal="%s"', value, soal);
if (!value && !soal) return false;
const vNorm = normalizeText(safeString(value));
const sNorm = normalizeText(safeString(soal));
// exact match
if (vNorm === sNorm) {
console.log('β
DEBUG exact match');
return true;
}
// try math evaluation for both sides
const vMath = tryEvaluateMathExpression(vNorm);
const sMath = tryEvaluateMathExpression(sNorm);
if (vMath !== null && sMath !== null && vMath === sMath) {
console.log('β
DEBUG math match:', vMath);
return true;
}
if (vMath !== null && sMath === null && vMath === sNorm) {
console.log('β
DEBUG math->soal match:', vMath);
return true;
}
if (sMath !== null && vMath === null && sMath === vNorm) {
console.log('β
DEBUG soal->math match:', sMath);
return true;
}
// numeric comparison if either is numeric
const vNum = parseFloat(vNorm);
const sNum = parseFloat(sNorm);
if (!isNaN(vNum) && !isNaN(sNum) && Math.abs(vNum - sNum) < 1e-9) {
console.log('β
DEBUG numeric equal');
return true;
}
// fuzzy text similarity
const sim = similarity(vNorm, sNorm);
console.log('π DEBUG Similarity:', sim);
if (sim >= fuzzyThreshold) {
console.log('β
DEBUG fuzzy match (threshold=', fuzzyThreshold, ')');
return true;
}
// partial match: one contains the other with decent length
if (vNorm && sNorm) {
if (vNorm.includes(sNorm) || sNorm.includes(vNorm)) {
const longer = Math.max(vNorm.length, sNorm.length);
if (longer >= 3) {
console.log('β
DEBUG partial contains match');
return true;
}
}
}
console.log('β DEBUG: No match found');
return false;
}
function mapAnswer(soalArray, jawaban, botIndex) {
console.log(`π€ DEBUG mapAnswer: Bot ${botIndex}, Jawaban: "${jawaban}"`);
// jawaban can be object or string; normalize to string for now
if (jawaban && typeof jawaban === 'object' && jawaban.response) {
return jawaban.response;
}
return jawaban;
}
// --- Integrated helper from userscript: countPairs ---
function countPairs(s1 = '', s2 = '') {
s1 = safeString(s1).toLowerCase().replace(/[^a-z]/g, '');
s2 = safeString(s2).toLowerCase().replace(/[^a-z]/g, '');
const n1 = s1.length, n2 = s2.length;
const freq1 = Array(26).fill(0);
const freq2 = Array(26).fill(0);
for (let i = 0; i < n1; i++) freq1[s1.charCodeAt(i) - 97] = (freq1[s1.charCodeAt(i) - 97] || 0) + 1;
for (let i = 0; i < n2; i++) freq2[s2.charCodeAt(i) - 97] = (freq2[s2.charCodeAt(i) - 97] || 0) + 1;
let count = 0;
for (let i = 0; i < 26; i++) count += Math.min(freq1[i], freq2[i]);
return count;
}
// Export utilities
module.exports = {
extractTextFromBuffer,
mapAnswer,
normalizeText,
isValueMatch,
levenshtein,
similarity,
NUMBER_WORDS,
LEET_MAP,
countPairs
}; |