Spaces:

MedhaCodes
/

OCR_Captcha_Recognizer

Sleeping

App Files Files Community

MedhaCodes commited on 13 days ago

Commit

dfc2fe0

verified ·

1 Parent(s): fd6243f

Upload 9 files

Browse files

Files changed (9) hide show

Dockerfile +30 -0
app/infer.py +57 -0
app/main.py +33 -0
app/model.py +40 -0
app/utils.py +27 -0
requirements.txt +9 -0
static/app.js +57 -0
static/style.css +57 -0
templates/index.html +29 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,30 @@

+FROM python:3.10-slim
+WORKDIR /app
+# ================================
+# SYSTEM DEPENDENCIES (CRITICAL)
+# ================================
+RUN apt-get update && apt-get install -y \
+    libgl1 \
+    libglib2.0-0 \
+    libsm6 \
+    libxext6 \
+    libxrender1 \
+    && rm -rf /var/lib/apt/lists/*
+# ================================
+# PYTHON DEPENDENCIES
+# ================================
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# ================================
+# COPY PROJECT
+# ================================
+COPY . .
+EXPOSE 8000
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]

app/infer.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import cv2
+import torch
+import numpy as np
+import os
+from app.model import CRNN
+from app.utils import ctc_decode
+# =========================
+# DEVICE
+# =========================
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# =========================
+# ABSOLUTE PATH FIX
+# =========================
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+WEIGHTS_PATH = os.path.join(BASE_DIR, "weights", "ocr_model.pth")
+if not os.path.exists(WEIGHTS_PATH):
+    raise FileNotFoundError(f"Model file not found: {WEIGHTS_PATH}")
+# =========================
+# LOAD MODEL
+# =========================
+model = CRNN()
+model.load_state_dict(torch.load(WEIGHTS_PATH, map_location=DEVICE))
+model.to(DEVICE)
+model.eval()
+# =========================
+# IMAGE PREPROCESSING
+# =========================
+def preprocess_image(image_bytes):
+    np_img = np.frombuffer(image_bytes, np.uint8)
+    img = cv2.imdecode(np_img, cv2.IMREAD_GRAYSCALE)
+    if img is None:
+        raise ValueError("Invalid image uploaded")
+    img = cv2.resize(img, (160, 60))
+    img = img.astype("float32") / 255.0
+    tensor = torch.tensor(img).unsqueeze(0).unsqueeze(0)
+    return tensor.to(DEVICE)
+# =========================
+# OCR PREDICTION
+# =========================
+def predict(image_bytes):
+    img = preprocess_image(image_bytes)
+    with torch.no_grad():
+        logits = model(img)
+    text = ctc_decode(logits)[0]
+    return text

app/main.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from fastapi import FastAPI, UploadFile, File, Request
+from fastapi.responses import HTMLResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi.templating import Jinja2Templates
+from app.infer import predict
+app = FastAPI(title="OCR Captcha Recognizer")
+# -------------------------
+# Static & Templates
+# -------------------------
+app.mount("/static", StaticFiles(directory="static"), name="static")
+templates = Jinja2Templates(directory="templates")
+# -------------------------
+# ROOT → HTML (VERY IMPORTANT)
+# -------------------------
+@app.get("/", response_class=HTMLResponse)
+async def home(request: Request):
+    return templates.TemplateResponse(
+        "index.html",
+        {"request": request}
+    )
+# -------------------------
+# OCR API
+# -------------------------
+@app.post("/predict")
+async def predict_captcha(file: UploadFile = File(...)):
+    image_bytes = await file.read()
+    text = predict(image_bytes)
+    return {"text": text}

app/model.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import torch
+import torch.nn as nn
+from app.utils import CHARS
+NUM_CLASSES = len(CHARS)
+class CRNN(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.cnn = nn.Sequential(
+            nn.Conv2d(1, 64, 3, padding=1), nn.BatchNorm2d(64), nn.ReLU(),
+            nn.MaxPool2d(2, 2),
+            nn.Conv2d(64, 128, 3, padding=1), nn.BatchNorm2d(128), nn.ReLU(),
+            nn.MaxPool2d(2, 2),
+            nn.Conv2d(128, 256, 3, padding=1), nn.BatchNorm2d(256), nn.ReLU(),
+            nn.MaxPool2d((2, 1)),
+            nn.Conv2d(256, 256, 3, padding=1), nn.ReLU()
+        )
+        self.rnn = nn.LSTM(
+            input_size=256 * 7,
+            hidden_size=256,
+            num_layers=2,
+            bidirectional=True,
+            batch_first=True
+        )
+        self.fc = nn.Linear(512, NUM_CLASSES)
+    def forward(self, x):
+        x = self.cnn(x)
+        b, c, h, w = x.shape
+        x = x.permute(0, 3, 1, 2).reshape(b, w, c * h)
+        x, _ = self.rnn(x)
+        return self.fc(x)

app/utils.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import string
+import torch
+DIGITS  = string.digits
+LETTERS = string.ascii_lowercase
+BLANK_CHAR = "-"
+CHARS = DIGITS + LETTERS + BLANK_CHAR
+char2idx = {c: i for i, c in enumerate(CHARS)}
+idx2char = {i: c for c, i in char2idx.items()}
+def ctc_decode(logits):
+    preds = logits.argmax(dim=2)
+    texts = []
+    for seq in preds:
+        prev = None
+        text = ""
+        for idx in seq:
+            idx = idx.item()
+            if idx != prev and idx != char2idx[BLANK_CHAR]:
+                text += idx2char[idx]
+            prev = idx
+        texts.append(text)
+    return texts

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+fastapi
+uvicorn
+python-multipart
+torch
+torchvision
+opencv-python
+numpy
+pillow

static/app.js ADDED Viewed

	@@ -0,0 +1,57 @@

+async function predict() {
+    const input = document.getElementById("imageInput");
+    const resultText = document.getElementById("resultText");
+    const preview = document.getElementById("previewImage");
+    // ---------------------------
+    // Validation
+    // ---------------------------
+    if (!input.files || input.files.length === 0) {
+        alert("Please select a captcha image first");
+        return;
+    }
+    const file = input.files[0];
+    // ---------------------------
+    // Image Preview
+    // ---------------------------
+    preview.src = URL.createObjectURL(file);
+    preview.style.display = "block";
+    // ---------------------------
+    // Prepare Form Data
+    // ---------------------------
+    const formData = new FormData();
+    formData.append("file", file);
+    // ---------------------------
+    // UI Loading State
+    // ---------------------------
+    resultText.innerText = "⏳ Reading captcha...";
+    try {
+        // ---------------------------
+        // API Call
+        // ---------------------------
+        const response = await fetch("/predict", {
+            method: "POST",
+            body: formData
+        });
+        if (!response.ok) {
+            throw new Error("Server error");
+        }
+        const data = await response.json();
+        // ---------------------------
+        // Show Result
+        // ---------------------------
+        resultText.innerText = data.text || "❌ No text detected";
+    } catch (error) {
+        console.error("Prediction error:", error);
+        resultText.innerText = "❌ Prediction failed";
+    }
+}

static/style.css ADDED Viewed

	@@ -0,0 +1,57 @@

+body {
+    font-family: Arial, sans-serif;
+    background: #0f172a;
+    color: white;
+    display: flex;
+    justify-content: center;
+    align-items: center;
+    height: 100vh;
+}
+.container {
+    background: #020617;
+    padding: 30px;
+    border-radius: 12px;
+    width: 400px;
+    text-align: center;
+    box-shadow: 0 0 20px rgba(0,0,0,0.5);
+}
+h1 {
+    margin-bottom: 10px;
+}
+input[type="file"] {
+    margin: 15px 0;
+}
+button {
+    background: #22c55e;
+    border: none;
+    padding: 10px 20px;
+    color: black;
+    font-weight: bold;
+    cursor: pointer;
+    border-radius: 6px;
+}
+button:hover {
+    background: #16a34a;
+}
+.preview img {
+    margin-top: 15px;
+    max-width: 100%;
+    border-radius: 6px;
+    background: white;
+}
+.result {
+    margin-top: 20px;
+}
+#resultText {
+    font-size: 28px;
+    font-weight: bold;
+    color: #38bdf8;
+}

templates/index.html ADDED Viewed

	@@ -0,0 +1,29 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8" />
+    <title>OCR Captcha Recognition</title>
+    <link rel="stylesheet" href="/static/style.css">
+</head>
+<body>
+<div class="container">
+    <h1>🔤 OCR Captcha Recognition</h1>
+    <p>Upload a captcha image and let AI read it</p>
+    <input type="file" id="imageInput" accept="image/*">
+    <button onclick="predict()">🔍 Predict</button>
+    <div class="preview">
+        <img id="previewImage" />
+    </div>
+    <div class="result">
+        <h3>Prediction:</h3>
+        <span id="resultText">---</span>
+    </div>
+</div>
+<script src="/static/app.js"></script>
+</body>
+</html>