Update endpoints/antibot.js
Browse files- endpoints/antibot.js +34 -19
endpoints/antibot.js
CHANGED
|
@@ -1,6 +1,25 @@
|
|
| 1 |
const Tesseract = require("tesseract.js");
|
| 2 |
const sharp = require("sharp");
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
const DICTIONARIES = {
|
| 6 |
wordnumber: { ONE: '1', TWO: '2', THREE: '3', FOUR: '4', FIVE: '5', SIX: '6', SEVEN: '7', EIGHT: '8', NINE: '9', TEN: '10' },
|
|
@@ -13,15 +32,11 @@ const DICTIONARIES = {
|
|
| 13 |
xxx: { '--X': '--+', '-X-': '-+-', 'X--': '+--', 'XX-': '++-', '-XX': '-++', 'X-X': '+-+', '---': '---', 'XXX': '+++', 'X-X-': '+-+-', '-X-X': '-+-+' },
|
| 14 |
xox: { '--X': 'OO+', '-X-': 'O+O', 'X--': '+OO', 'XX-': '++O', '-XX': 'O++', 'X-X': '+O+', '---': 'OOO', 'XXX': '+++', 'X-X-': '+O+O', '-X-X': 'O+O+' },
|
| 15 |
oxo: { OOX: '--+', OXO: '-+-', XOO: '+--', XXO: '++-', OXX: '-++', XOX: '+-+', OOO: '---', XXX: '+++', XOXO: '+-+-', OXOX: '-+-+' },
|
| 16 |
-
zoo: { ZOO: '200', OZO: '020', OOZ: '002', SOO: '500',
|
| 17 |
ooz: { '200': 'ZOO', '020': 'OZO', '002': 'OOZ', '500': 'SOO', '050': 'OSO', '005': 'OOS', '101': 'LOL', '505': 'SOS', '202': 'ZOZ', '111': 'LLL' },
|
| 18 |
animals: { CAT: 'C@T', DOG: 'D0G', LION: '1!0N', TIGER: 'T!G3R', MONKEY: 'M0NK3Y', ELEPHANT: '31EPH@NT', COW: 'C0W', FOX: 'F0X', MOUSE: 'M0US3', ANT: '@NT' }
|
| 19 |
};
|
| 20 |
|
| 21 |
-
const LEET_GENERAL = {
|
| 22 |
-
'0': 'O', '1': 'I', '3': 'E', '4': 'A', '5': 'S', '7': 'T', '8': 'B', '@': 'A', '$': 'S', '!': 'I'
|
| 23 |
-
};
|
| 24 |
-
|
| 25 |
function applyVariations(word, type) {
|
| 26 |
let w = word.toUpperCase();
|
| 27 |
|
|
@@ -75,12 +90,12 @@ function findBestMatch(scannedText) {
|
|
| 75 |
|
| 76 |
for (const [key, val] of Object.entries(dict)) {
|
| 77 |
if (val === variedText) return { type, value: key, original: variedText };
|
| 78 |
-
|
| 79 |
const distKey = levenshtein(variedText, key);
|
| 80 |
const distVal = levenshtein(variedText, val);
|
| 81 |
-
|
| 82 |
-
if (distKey <= 1) return { type, value: val, original: key };
|
| 83 |
-
if (distVal <= 1) return { type, value: key, original: val };
|
| 84 |
}
|
| 85 |
}
|
| 86 |
|
|
@@ -113,7 +128,7 @@ async function preprocessImage(imageBuffer, type = 'main') {
|
|
| 113 |
async function ocr(imageBuffer, psmMode = '6') {
|
| 114 |
try {
|
| 115 |
const result = await Tesseract.recognize(imageBuffer, "eng", {
|
| 116 |
-
tessedit_char_whitelist: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789
|
| 117 |
tessedit_pageseg_mode: psmMode
|
| 118 |
});
|
| 119 |
return result.data.text.trim();
|
|
@@ -129,9 +144,9 @@ module.exports = async (data) => {
|
|
| 129 |
|
| 130 |
const mainBuffer = buf(data.main);
|
| 131 |
const processedMain = await preprocessImage(mainBuffer, 'main');
|
| 132 |
-
const rawMainText = await ocr(processedMain, '7');
|
| 133 |
-
|
| 134 |
-
const mainWords = rawMainText.split(/\s+/).filter(w => w.length > 0);
|
| 135 |
console.log("π Raw Main Text: ", rawMainText);
|
| 136 |
console.log("π Parsed Words: ", mainWords);
|
| 137 |
|
|
@@ -142,20 +157,20 @@ module.exports = async (data) => {
|
|
| 142 |
targetConcepts.push(match);
|
| 143 |
}
|
| 144 |
}
|
| 145 |
-
|
| 146 |
if (targetConcepts.length === 0) {
|
| 147 |
targetConcepts = mainWords.map(w => ({ type: 'raw', value: normalize(w) }));
|
| 148 |
}
|
| 149 |
|
| 150 |
-
console.log("π― Target Concepts: ", targetConcepts);
|
| 151 |
|
| 152 |
const botResults = [];
|
| 153 |
for (const bot of data.bots) {
|
| 154 |
const botBuffer = buf(bot.img);
|
| 155 |
const processedBot = await preprocessImage(botBuffer, 'bot');
|
| 156 |
-
const rawBotText = await ocr(processedBot, '6');
|
| 157 |
const cleanBotText = normalize(rawBotText);
|
| 158 |
-
|
| 159 |
const botAnalysis = findBestMatch(cleanBotText);
|
| 160 |
|
| 161 |
botResults.push({
|
|
@@ -178,7 +193,7 @@ module.exports = async (data) => {
|
|
| 178 |
|
| 179 |
if (target.value === bot.analysis.value || target.value === bot.clean) {
|
| 180 |
bestBotId = bot.id;
|
| 181 |
-
minDistance =
|
| 182 |
break;
|
| 183 |
}
|
| 184 |
|
|
|
|
| 1 |
const Tesseract = require("tesseract.js");
|
| 2 |
const sharp = require("sharp");
|
| 3 |
+
|
| 4 |
+
function levenshtein(s, t) {
|
| 5 |
+
if (s === t) {
|
| 6 |
+
return 0;
|
| 7 |
+
}
|
| 8 |
+
var n = s.length, m = t.length;
|
| 9 |
+
if (n === 0) return m;
|
| 10 |
+
if (m === 0) return n;
|
| 11 |
+
var d = [];
|
| 12 |
+
for (var i = 0; i <= n; i++) d[i] = [];
|
| 13 |
+
for (var i = 0; i <= n; i++) d[i][0] = i;
|
| 14 |
+
for (var j = 0; j <= m; j++) d[0][j] = j;
|
| 15 |
+
for (var i = 1; i <= n; i++) {
|
| 16 |
+
for (var j = 1; j <= m; j++) {
|
| 17 |
+
var cost = (s.charAt(i - 1) === t.charAt(j - 1)) ? 0 : 1;
|
| 18 |
+
d[i][j] = Math.min(d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + cost);
|
| 19 |
+
}
|
| 20 |
+
}
|
| 21 |
+
return d[n][m];
|
| 22 |
+
}
|
| 23 |
|
| 24 |
const DICTIONARIES = {
|
| 25 |
wordnumber: { ONE: '1', TWO: '2', THREE: '3', FOUR: '4', FIVE: '5', SIX: '6', SEVEN: '7', EIGHT: '8', NINE: '9', TEN: '10' },
|
|
|
|
| 32 |
xxx: { '--X': '--+', '-X-': '-+-', 'X--': '+--', 'XX-': '++-', '-XX': '-++', 'X-X': '+-+', '---': '---', 'XXX': '+++', 'X-X-': '+-+-', '-X-X': '-+-+' },
|
| 33 |
xox: { '--X': 'OO+', '-X-': 'O+O', 'X--': '+OO', 'XX-': '++O', '-XX': 'O++', 'X-X': '+O+', '---': 'OOO', 'XXX': '+++', 'X-X-': '+O+O', '-X-X': 'O+O+' },
|
| 34 |
oxo: { OOX: '--+', OXO: '-+-', XOO: '+--', XXO: '++-', OXX: '-++', XOX: '+-+', OOO: '---', XXX: '+++', XOXO: '+-+-', OXOX: '-+-+' },
|
| 35 |
+
zoo: { ZOO: '200', OZO: '020', OOZ: '002', SOO: '500', '0SO': '050', OOS: '005', LOL: '101', SOS: '505', ZOZ: '202', LLL: '111' },
|
| 36 |
ooz: { '200': 'ZOO', '020': 'OZO', '002': 'OOZ', '500': 'SOO', '050': 'OSO', '005': 'OOS', '101': 'LOL', '505': 'SOS', '202': 'ZOZ', '111': 'LLL' },
|
| 37 |
animals: { CAT: 'C@T', DOG: 'D0G', LION: '1!0N', TIGER: 'T!G3R', MONKEY: 'M0NK3Y', ELEPHANT: '31EPH@NT', COW: 'C0W', FOX: 'F0X', MOUSE: 'M0US3', ANT: '@NT' }
|
| 38 |
};
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
function applyVariations(word, type) {
|
| 41 |
let w = word.toUpperCase();
|
| 42 |
|
|
|
|
| 90 |
|
| 91 |
for (const [key, val] of Object.entries(dict)) {
|
| 92 |
if (val === variedText) return { type, value: key, original: variedText };
|
| 93 |
+
|
| 94 |
const distKey = levenshtein(variedText, key);
|
| 95 |
const distVal = levenshtein(variedText, val);
|
| 96 |
+
|
| 97 |
+
if (distKey <= 1) return { type, value: val, original: key };
|
| 98 |
+
if (distVal <= 1) return { type, value: key, original: val };
|
| 99 |
}
|
| 100 |
}
|
| 101 |
|
|
|
|
| 128 |
async function ocr(imageBuffer, psmMode = '6') {
|
| 129 |
try {
|
| 130 |
const result = await Tesseract.recognize(imageBuffer, "eng", {
|
| 131 |
+
tessedit_char_whitelist: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-*=@!$,.:;",
|
| 132 |
tessedit_pageseg_mode: psmMode
|
| 133 |
});
|
| 134 |
return result.data.text.trim();
|
|
|
|
| 144 |
|
| 145 |
const mainBuffer = buf(data.main);
|
| 146 |
const processedMain = await preprocessImage(mainBuffer, 'main');
|
| 147 |
+
const rawMainText = await ocr(processedMain, '7');
|
| 148 |
+
|
| 149 |
+
const mainWords = rawMainText.replace(/[,.]/g, ' ').split(/\s+/).filter(w => w.length > 0);
|
| 150 |
console.log("π Raw Main Text: ", rawMainText);
|
| 151 |
console.log("π Parsed Words: ", mainWords);
|
| 152 |
|
|
|
|
| 157 |
targetConcepts.push(match);
|
| 158 |
}
|
| 159 |
}
|
| 160 |
+
|
| 161 |
if (targetConcepts.length === 0) {
|
| 162 |
targetConcepts = mainWords.map(w => ({ type: 'raw', value: normalize(w) }));
|
| 163 |
}
|
| 164 |
|
| 165 |
+
console.log("π― Target Concepts: ", targetConcepts.map(t => t.value));
|
| 166 |
|
| 167 |
const botResults = [];
|
| 168 |
for (const bot of data.bots) {
|
| 169 |
const botBuffer = buf(bot.img);
|
| 170 |
const processedBot = await preprocessImage(botBuffer, 'bot');
|
| 171 |
+
const rawBotText = await ocr(processedBot, '6');
|
| 172 |
const cleanBotText = normalize(rawBotText);
|
| 173 |
+
|
| 174 |
const botAnalysis = findBestMatch(cleanBotText);
|
| 175 |
|
| 176 |
botResults.push({
|
|
|
|
| 193 |
|
| 194 |
if (target.value === bot.analysis.value || target.value === bot.clean) {
|
| 195 |
bestBotId = bot.id;
|
| 196 |
+
minDistance = -1;
|
| 197 |
break;
|
| 198 |
}
|
| 199 |
|