antibot2forget

Running

App Files Files Community

fourmovie commited on Nov 21, 2025

Commit

0c8658d

1 Parent(s): 6e96b3b

update

Browse files

Files changed (4) hide show

Dockerfile +5 -0
endpoints/antibot.js +161 -176
endpoints/ocr.py +187 -0
package.json +1 -0

Dockerfile CHANGED Viewed

@@ -1,9 +1,11 @@
 FROM node:20-slim
 RUN apt update && apt install -y \
     wget gnupg ca-certificates xvfb \
     fonts-liberation libappindicator3-1 libasound2 libatk-bridge2.0-0 \
     libatk1.0-0 libxss1 libnss3 libxcomposite1 libxdamage1 libxrandr2 libgbm1 \
     && wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb \
     && apt install -y ./google-chrome-stable_current_amd64.deb \
     && rm google-chrome-stable_current_amd64.deb
@@ -13,6 +15,9 @@ WORKDIR /app
 RUN mkdir -p /app/endpoints && \
     mkdir -p /app/cache
 COPY package*.json ./
 RUN npm install

 FROM node:20-slim
+# Install system dependencies termasuk Python dan OpenCV
 RUN apt update && apt install -y \
     wget gnupg ca-certificates xvfb \
     fonts-liberation libappindicator3-1 libasound2 libatk-bridge2.0-0 \
     libatk1.0-0 libxss1 libnss3 libxcomposite1 libxdamage1 libxrandr2 libgbm1 \
+    python3 python3-pip python3-opencv tesseract-ocr tesseract-ocr-eng \
     && wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb \
     && apt install -y ./google-chrome-stable_current_amd64.deb \
     && rm google-chrome-stable_current_amd64.deb
 RUN mkdir -p /app/endpoints && \
     mkdir -p /app/cache
+# Install Python dependencies
+RUN pip3 install pytesseract pillow numpy opencv-python
 COPY package*.json ./
 RUN npm install

endpoints/antibot.js CHANGED Viewed

@@ -1,193 +1,178 @@
-const Tesseract = require("tesseract.js");
-const fs = require("fs");
-const path = require("path");
-const sharp = require("sharp");
-const WORD_TO_NUM = {
-  zero:"0",oh:"0",one:"1",won:"1",two:"2",to:"2",too:"2",three:"3",tree:"3",
-  four:"4",for:"4",five:"5",six:"6",seven:"7",eight:"8",ate:"8",nine:"9",ten:"10"
-};
-const LEET_REPLACE = {
-  "4":"a","3":"e","1":"i","0":"o","5":"s","7":"t","@":"a","$":"s","|":"l"
-};
 const buf = b => Buffer.from(b.replace(/^data:image\/\w+;base64,/,""),"base64");
-function fixLeetToText(s){
-  return s.toLowerCase().split("").map(c=>LEET_REPLACE[c]??c).join("");
-}
-function wordToDigit(t){
-  if(!t)return"";
-  t=t.toLowerCase().trim();
-  if(/^\d+$/.test(t))return t;
-  const c=t.replace(/[^a-z0-9]/gi,"");
-  const f=fixLeetToText(c);
-  if(WORD_TO_NUM[f]!==undefined)return WORD_TO_NUM[f];
-  const d=t.match(/\d+/);
-  if(d)return d[0];
-  return"";
-}
-function normalizeExpression(raw){
-  if(!raw)return"";
-  let s=raw.toLowerCase().replace(/\s+/g,"");
-  s=s.replace(/[×xX·•]/g,"*");
-  const parts=s.split(/([+\-*/()])/).filter(p=>p!=="");
-  return parts.map(p=>{
-    if(/^[+\-*/()]+$/.test(p))return p;
-    let num=wordToDigit(p);
-    if(num)return num;
-    const tf=fixLeetToText(p).replace(/[^a-z0-9]/g,"");
-    num=WORD_TO_NUM[tf]??"";
-    if(num)return num;
-    const dig=p.match(/\d+/);
-    if(dig)return dig[0];
-    return p;
-  }).join("");
-}
-function safeEvalExpression(expr){
-  if(!expr)return null;
-  const n=normalizeExpression(expr);
-  if(!/^[0-9+\-*/().]+$/.test(n))return null;
-  try{
-    const v=Function(`"use strict";return(${n});`)();
-    if(typeof v==="number"&&isFinite(v)){
-      if(Math.abs(v-Math.round(v))<1e-9)return String(Math.round(v));
-      return String(v);
     }
-    return null;
-  }catch(e){return null;}
-}
-async function writeTempAndRecognize(b,opt={}){
-  const tmp=path.join(__dirname,"tmp_"+Date.now()+"_"+Math.random()+".png");
-  fs.writeFileSync(tmp,b);
-  try{
-    const r=await Tesseract.recognize(tmp,"eng",opt);
-    fs.unlinkSync(tmp);
-    return r.data.text||"";
-  }catch(e){
-    if(fs.existsSync(tmp))fs.unlinkSync(tmp);
-    return"";
-  }
-}
-async function ocrDigit(b){
-  const t=await writeTempAndRecognize(b,{
-    tessedit_char_whitelist:"0123456789Il!|OoZSTBGg",
-    tessedit_pageseg_mode:"7",
-    tessedit_ocr_engine_mode:"1"
-  });
-  const m={"I":"1","l":"1","|":"1","!":"1","O":"0","o":"0","Q":"0","Z":"2","z":"2",
-  "S":"5","s":"5","T":"7","t":"7","B":"8","b":"8","G":"6","g":"9"};
-  return t.split("").map(c=>m[c]??c).join("").replace(/[^0-9]/g,"");
-}
-async function ocrGeneric(b){
-  const t=await writeTempAndRecognize(b,{
-    tessedit_char_whitelist:"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-*/()., ",
-    tessedit_pageseg_mode:"7",
-    tessedit_ocr_engine_mode:"1"
-  });
-  return t.trim();
-}
-async function preprocessForSoal(b){
-  return sharp(b).resize({width:1000}).grayscale().normalize().sharpen({sigma:1.2}).median(1).toBuffer();
-}
-async function preprocessForBot(b){
-  return sharp(b).resize({width:700}).grayscale().normalize().sharpen({sigma:1}).median(1).toBuffer();
-}
-module.exports = async function solveAntiBot(data){
-  try{
-    const soalBuf=buf(data.main);
-    const soalPrep=await preprocessForSoal(soalBuf);
-    const soalRawText=await ocrGeneric(soalPrep);
-    const tok=soalRawText.split(/[,;]+|\s+/).filter(Boolean);
-    const soalNumbers=tok.slice(0,10).map(t=>{
-      const e=safeEvalExpression(t);
-      if(e!==null&&e!=="")return e;
-      const w=wordToDigit(t);
-      if(w)return w;
-      const fx=fixLeetToText(t.replace(/[^a-z0-9+\-*/().]/gi,""));
-      if(WORD_TO_NUM[fx]!==undefined)return WORD_TO_NUM[fx];
-      const d=t.match(/\d+/);
-      if(d)return d[0];
-      return"";
-    }).filter(Boolean);
-    const botResults=[];
-    for(const b of data.bots){
-      const bbuf=buf(b.img);
-      const prep=await preprocessForBot(bbuf);
-      let v=await ocrDigit(prep);
-      if(!v){
-        const g=await ocrGeneric(prep);
-        if(g){
-          const ev=safeEvalExpression(g);
-          if(ev!==null)v=ev;
-          else v=wordToDigit(g)||fixLeetToText(g).replace(/[^0-9]/g,"");
         }
-      }
-      botResults.push({id:b.id,raw:v||""});
     }
-    botResults.forEach(br=>{
-      if(!br.raw)return;
-      if(!/^\d+$/.test(br.raw)){
-        br.raw=wordToDigit(br.raw)||safeEvalExpression(br.raw)||br.raw.match(/\d+/)?.[0]||"";
-      }
-    });
-    const result=[];
-    const used=new Set();
-    for(const need of soalNumbers){
-      let f=null;
-      for(const b of botResults){
-        if(!used.has(b.id)&&b.raw&&String(b.raw)===String(need)){
-          f=b.id;break;
-        }
-      }
-      if(!f){
-        for(const b of botResults){
-          if(!used.has(b.id)&&b.raw&&Number(b.raw)==Number(need)){
-            f=b.id;break;
-          }
-        }
-      }
-      if(!f){
-        for(const b of botResults){
-          if(!used.has(b.id)&&b.raw){
-            const bw=fixLeetToText(String(b.raw));
-            if(WORD_TO_NUM[bw]===String(need)){f=b.id;break;}
-          }
         }
-      }
-      if(f){result.push(f);used.add(f);}
-      else result.push(null);
     }
-    for(let i=0;i<result.length;i++){
-      if(result[i]===null){
-        for(const b of botResults){
-          if(!used.has(b.id)){
-            result[i]=b.id;
-            used.add(b.id);
-            break;
-          }
         }
-      }
     }
-    return {rawSoalText:soalRawText,soalNumbers,botResults,result};
-  }catch(e){
-    return {rawSoalText:"",soalNumbers:[],botResults:[],result:[]};
-  }
 };

+const { spawn } = require('child_process');
+const path = require('path');
 const buf = b => Buffer.from(b.replace(/^data:image\/\w+;base64,/,""),"base64");
+class SimpleAntibot {
+    constructor() {
+        this.pythonScript = path.join(__dirname, 'ocr.py');
     }
+    async runPythonOCR(imageBuffer, type = 'soal') {
+        return new Promise((resolve, reject) => {
+            const base64Image = imageBuffer.toString('base64');
+            const pythonProcess = spawn('python3', [this.pythonScript, base64Image, type]);
+            let result = '';
+            let error = '';
+            pythonProcess.stdout.on('data', (data) => {
+                result += data.toString();
+            });
+            pythonProcess.stderr.on('data', (data) => {
+                error += data.toString();
+            });
+            pythonProcess.on('close', (code) => {
+                if (code === 0) {
+                    try {
+                        const parsed = JSON.parse(result);
+                        resolve(parsed.text || '');
+                    } catch (e) {
+                        reject(new Error(`JSON parse error: ${e}`));
+                    }
+                } else {
+                    reject(new Error(`Python process failed: ${error}`));
+                }
+            });
+        });
+    }
+    levenshtein(a, b) {
+        if (!a || !b) return Math.max(a?.length || 0, b?.length || 0);
+        const matrix = [];
+        const aLen = a.length;
+        const bLen = b.length;
+        for (let i = 0; i <= bLen; i++) matrix[i] = [i];
+        for (let j = 0; j <= aLen; j++) matrix[0][j] = j;
+        for (let i = 1; i <= bLen; i++) {
+            for (let j = 1; j <= aLen; j++) {
+                matrix[i][j] = b[i-1] === a[j-1]
+                    ? matrix[i-1][j-1]
+                    : Math.min(matrix[i-1][j-1] + 1, matrix[i][j-1] + 1, matrix[i-1][j] + 1);
+            }
         }
+        return matrix[bLen][aLen];
     }
+    similarity(a, b) {
+        if (!a || !b) return 0;
+        const dist = this.levenshtein(a, b);
+        const maxLen = Math.max(a.length, b.length);
+        return maxLen === 0 ? 1 : 1 - dist / maxLen;
+    }
+    async solveAntiBot(data) {
+        try {
+            // PROCESS SOAL - dapat 3 teks
+            const soalBuf = buf(data.main);
+            const soalText = await this.runPythonOCR(soalBuf, 'soal');
+            // Split menjadi 3 bagian (soal)
+            const soalParts = this.splitSoalText(soalText);
+            const soalLeet = soalParts.map(part => part); // Sudah di-leetize dari Python
+            // PROCESS BOTS
+            const botResults = [];
+            for (const bot of data.bots) {
+                const botBuf = buf(bot.img);
+                const botLeet = await this.runPythonOCR(botBuf, 'bot');
+                botResults.push({
+                    id: bot.id,
+                    value: botLeet
+                });
+            }
+            // SIMPLE MATCHING
+            const result = [];
+            const usedBots = new Set();
+            // Match each soal part dengan bot
+            for (const soal of soalLeet) {
+                let bestBot = null;
+                let bestScore = 0;
+                for (const bot of botResults) {
+                    if (usedBots.has(bot.id) || !bot.value) continue;
+                    const score = this.similarity(soal, bot.value);
+                    if (score > bestScore) {
+                        bestScore = score;
+                        bestBot = bot.id;
+                    }
+                }
+                if (bestBot && bestScore >= 0.3) {
+                    result.push(bestBot);
+                    usedBots.add(bestBot);
+                } else {
+                    result.push(null);
+                }
+            }
+            // Auto-fill remaining
+            for (let i = 0; i < result.length; i++) {
+                if (result[i] === null) {
+                    for (const bot of botResults) {
+                        if (!usedBots.has(bot.id)) {
+                            result[i] = bot.id;
+                            usedBots.add(bot.id);
+                            break;
+                        }
+                    }
+                }
+            }
+            return {
+                soal: soalParts,        // Text asli dari OCR
+                soalLeet: soalLeet,     // Text yang sudah di-leetize
+                botResults: botResults, // Bot results dengan value leet
+                result: result          // Matching result
+            };
+        } catch (error) {
+            console.error('Antibot error:', error);
+            return {
+                soal: [],
+                soalLeet: [],
+                botResults: [],
+                result: []
+            };
         }
     }
+    splitSoalText(text) {
+        if (!text) return ['', '', ''];
+        // Coba split by common separators
+        const separators = /[,;|]|\s+/;
+        const parts = text.split(separators).filter(part => part.trim());
+        if (parts.length >= 3) {
+            return parts.slice(0, 3).map(p => p.trim());
+        }
+        // Jika kurang dari 3, split text menjadi 3 bagian
+        const result = [];
+        const partLength = Math.ceil(text.length / 3);
+        for (let i = 0; i < 3; i++) {
+            const start = i * partLength;
+            const end = (i + 1) * partLength;
+            result.push(text.substring(start, end).trim());
         }
+        return result;
     }
+}
+module.exports = SimpleAntibot;
+// Untuk compatibility
+module.exports.solveAntiBot = async function(data) {
+    const antibot = new SimpleAntibot();
+    return await antibot.solveAntiBot(data);
 };

endpoints/ocr.py ADDED Viewed

	@@ -0,0 +1,187 @@

+#!/usr/bin/env python3
+import cv2
+import numpy as np
+import pytesseract
+import re
+import sys
+import json
+import base64
+from PIL import Image
+import io
+class SimpleOCR:
+    def __init__(self):
+        # English to number (1-10 only)
+        self.word_to_num = {
+            'zero': '0', 'oh': '0', 'one': '1', 'won': '1',
+            'two': '2', 'to': '2', 'too': '2', 'three': '3', 'tree': '3',
+            'four': '4', 'for': '4', 'five': '5', 'six': '6',
+            'seven': '7', 'eight': '8', 'ate': '8', 'nine': '9', 'ten': '10'
+        }
+        # Leet map
+        self.leet_map = {
+            'a': '4', 'e': '3', 'g': '9', 'i': '1', 'l': '1',
+            'o': '0', 's': '5', 't': '7', 'b': '8', 'z': '2',
+            '@': '4', '$': '5', '&': '8'
+        }
+    def preprocess_image(self, image_bytes, image_type='soal'):
+        """Preprocess image untuk OCR"""
+        try:
+            nparr = np.frombuffer(image_bytes, np.uint8)
+            img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
+            if img is None:
+                return image_bytes
+            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+            denoised = cv2.medianBlur(gray, 3)
+            # CLAHE untuk contrast enhancement
+            clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
+            enhanced = clahe.apply(denoised)
+            # Threshold
+            _, thresh = cv2.threshold(enhanced, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+            # Resize jika perlu
+            height, width = thresh.shape
+            if image_type == 'soal' and width > 1000:
+                scale = 1000 / width
+                thresh = cv2.resize(thresh, (1000, int(height * scale)))
+            elif image_type == 'bot' and width > 600:
+                scale = 600 / width
+                thresh = cv2.resize(thresh, (600, int(height * scale)))
+            return thresh
+        except Exception as e:
+            print(f"Preprocessing error: {e}")
+            return image_bytes
+    def leetize(self, text):
+        """Convert text to leet speak"""
+        if not text:
+            return ""
+        return ''.join([self.leet_map.get(c.lower(), c) for c in text])
+    def words_to_number(self, text):
+        """Convert English words to numbers"""
+        if not text:
+            return ""
+        text_lower = text.lower().strip()
+        # Exact match
+        if text_lower in self.word_to_num:
+            return self.word_to_num[text_lower]
+        # Clean and try again
+        clean_text = re.sub(r'[^a-z0-9]', '', text_lower)
+        if clean_text in self.word_to_num:
+            return self.word_to_num[clean_text]
+        return ""
+    def safe_eval_math(self, text):
+        """Evaluate simple math expressions"""
+        try:
+            # Replace words with numbers
+            for word, num in self.word_to_num.items():
+                text = re.sub(r'\b' + word + r'\b', num, text, flags=re.IGNORECASE)
+            # Replace math symbols
+            text = text.replace('×', '*').replace('÷', '/').replace('x', '*')
+            text = text.replace('plus', '+').replace('minus', '-').replace('times', '*')
+            # Extract math expression
+            math_match = re.search(r'([\d+\-*/.()]+)', text)
+            if math_match:
+                result = eval(math_match.group(1))
+                return str(int(result)) if result == int(result) else str(result)
+        except:
+            pass
+        return None
+    def ocr_image(self, image_bytes, image_type='soal'):
+        """Perform OCR pada image"""
+        try:
+            processed_img = self.preprocess_image(image_bytes, image_type)
+            config = '--oem 1 --psm 8'  # Single word untuk bot
+            if image_type == 'soal':
+                config = '--oem 1 --psm 6'  # Block of text untuk soal
+            if isinstance(processed_img, np.ndarray):
+                pil_img = Image.fromarray(processed_img)
+            else:
+                pil_img = Image.open(io.BytesIO(processed_img))
+            text = pytesseract.image_to_string(pil_img, config=config)
+            return text.strip()
+        except Exception as e:
+            print(f"OCR error: {e}")
+            return ""
+    def process_soal(self, image_bytes):
+        """Process soal image dan return leet text"""
+        text = self.ocr_image(image_bytes, 'soal')
+        # Coba evaluasi math dulu
+        math_result = self.safe_eval_math(text)
+        if math_result:
+            return math_result
+        # Coba convert words to number
+        word_result = self.words_to_number(text)
+        if word_result:
+            return word_result
+        # Jika tidak ada math/words, langsung leetize
+        return self.leetize(text)
+    def process_bot(self, image_bytes):
+        """Process bot image dan return leet text"""
+        text = self.ocr_image(image_bytes, 'bot')
+        # Coba evaluasi math
+        math_result = self.safe_eval_math(text)
+        if math_result:
+            return math_result
+        # Coba convert words to number
+        word_result = self.words_to_number(text)
+        if word_result:
+            return word_result
+        # Langsung leetize
+        return self.leetize(text)
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: python ocr.py <base64_image> [soal|bot]")
+        sys.exit(1)
+    base64_image = sys.argv[1]
+    image_type = sys.argv[2] if len(sys.argv) > 2 else 'soal'
+    try:
+        ocr = SimpleOCR()
+        image_bytes = base64.b64decode(base64_image)
+        if image_type == 'soal':
+            result_text = ocr.process_soal(image_bytes)
+            output = {'text': result_text}
+        else:
+            result_text = ocr.process_bot(image_bytes)
+            output = {'text': result_text}
+        print(json.dumps(output))
+    except Exception as e:
+        error_output = {'error': str(e), 'text': ''}
+        print(json.dumps(error_output))
+if __name__ == '__main__':
+    main()

package.json CHANGED Viewed

@@ -12,6 +12,7 @@
     "sharp": "^0.32.0",
     "puppeteer-real-browser": "^1.4.0",
     "axios": "^1.9.0",
     "tesseract.js": "^5.0.3",
     "jimp": "^0.22.10"
   },

     "sharp": "^0.32.0",
     "puppeteer-real-browser": "^1.4.0",
     "axios": "^1.9.0",
+    "child_process": "*",
     "tesseract.js": "^5.0.3",
     "jimp": "^0.22.10"
   },