Spaces:
Sleeping
Sleeping
Upload 9 files
Browse files- Dockerfile +30 -0
- app/infer.py +57 -0
- app/main.py +33 -0
- app/model.py +40 -0
- app/utils.py +27 -0
- requirements.txt +9 -0
- static/app.js +57 -0
- static/style.css +57 -0
- templates/index.html +29 -0
Dockerfile
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
# ================================
|
| 6 |
+
# SYSTEM DEPENDENCIES (CRITICAL)
|
| 7 |
+
# ================================
|
| 8 |
+
RUN apt-get update && apt-get install -y \
|
| 9 |
+
libgl1 \
|
| 10 |
+
libglib2.0-0 \
|
| 11 |
+
libsm6 \
|
| 12 |
+
libxext6 \
|
| 13 |
+
libxrender1 \
|
| 14 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 15 |
+
|
| 16 |
+
# ================================
|
| 17 |
+
# PYTHON DEPENDENCIES
|
| 18 |
+
# ================================
|
| 19 |
+
COPY requirements.txt .
|
| 20 |
+
|
| 21 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 22 |
+
|
| 23 |
+
# ================================
|
| 24 |
+
# COPY PROJECT
|
| 25 |
+
# ================================
|
| 26 |
+
COPY . .
|
| 27 |
+
|
| 28 |
+
EXPOSE 8000
|
| 29 |
+
|
| 30 |
+
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
app/infer.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import cv2
|
| 2 |
+
import torch
|
| 3 |
+
import numpy as np
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
from app.model import CRNN
|
| 7 |
+
from app.utils import ctc_decode
|
| 8 |
+
|
| 9 |
+
# =========================
|
| 10 |
+
# DEVICE
|
| 11 |
+
# =========================
|
| 12 |
+
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 13 |
+
|
| 14 |
+
# =========================
|
| 15 |
+
# ABSOLUTE PATH FIX
|
| 16 |
+
# =========================
|
| 17 |
+
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 18 |
+
WEIGHTS_PATH = os.path.join(BASE_DIR, "weights", "ocr_model.pth")
|
| 19 |
+
|
| 20 |
+
if not os.path.exists(WEIGHTS_PATH):
|
| 21 |
+
raise FileNotFoundError(f"Model file not found: {WEIGHTS_PATH}")
|
| 22 |
+
|
| 23 |
+
# =========================
|
| 24 |
+
# LOAD MODEL
|
| 25 |
+
# =========================
|
| 26 |
+
model = CRNN()
|
| 27 |
+
model.load_state_dict(torch.load(WEIGHTS_PATH, map_location=DEVICE))
|
| 28 |
+
model.to(DEVICE)
|
| 29 |
+
model.eval()
|
| 30 |
+
|
| 31 |
+
# =========================
|
| 32 |
+
# IMAGE PREPROCESSING
|
| 33 |
+
# =========================
|
| 34 |
+
def preprocess_image(image_bytes):
|
| 35 |
+
np_img = np.frombuffer(image_bytes, np.uint8)
|
| 36 |
+
img = cv2.imdecode(np_img, cv2.IMREAD_GRAYSCALE)
|
| 37 |
+
|
| 38 |
+
if img is None:
|
| 39 |
+
raise ValueError("Invalid image uploaded")
|
| 40 |
+
|
| 41 |
+
img = cv2.resize(img, (160, 60))
|
| 42 |
+
img = img.astype("float32") / 255.0
|
| 43 |
+
|
| 44 |
+
tensor = torch.tensor(img).unsqueeze(0).unsqueeze(0)
|
| 45 |
+
return tensor.to(DEVICE)
|
| 46 |
+
|
| 47 |
+
# =========================
|
| 48 |
+
# OCR PREDICTION
|
| 49 |
+
# =========================
|
| 50 |
+
def predict(image_bytes):
|
| 51 |
+
img = preprocess_image(image_bytes)
|
| 52 |
+
|
| 53 |
+
with torch.no_grad():
|
| 54 |
+
logits = model(img)
|
| 55 |
+
|
| 56 |
+
text = ctc_decode(logits)[0]
|
| 57 |
+
return text
|
app/main.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, UploadFile, File, Request
|
| 2 |
+
from fastapi.responses import HTMLResponse
|
| 3 |
+
from fastapi.staticfiles import StaticFiles
|
| 4 |
+
from fastapi.templating import Jinja2Templates
|
| 5 |
+
|
| 6 |
+
from app.infer import predict
|
| 7 |
+
|
| 8 |
+
app = FastAPI(title="OCR Captcha Recognizer")
|
| 9 |
+
|
| 10 |
+
# -------------------------
|
| 11 |
+
# Static & Templates
|
| 12 |
+
# -------------------------
|
| 13 |
+
app.mount("/static", StaticFiles(directory="static"), name="static")
|
| 14 |
+
templates = Jinja2Templates(directory="templates")
|
| 15 |
+
|
| 16 |
+
# -------------------------
|
| 17 |
+
# ROOT → HTML (VERY IMPORTANT)
|
| 18 |
+
# -------------------------
|
| 19 |
+
@app.get("/", response_class=HTMLResponse)
|
| 20 |
+
async def home(request: Request):
|
| 21 |
+
return templates.TemplateResponse(
|
| 22 |
+
"index.html",
|
| 23 |
+
{"request": request}
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
# -------------------------
|
| 27 |
+
# OCR API
|
| 28 |
+
# -------------------------
|
| 29 |
+
@app.post("/predict")
|
| 30 |
+
async def predict_captcha(file: UploadFile = File(...)):
|
| 31 |
+
image_bytes = await file.read()
|
| 32 |
+
text = predict(image_bytes)
|
| 33 |
+
return {"text": text}
|
app/model.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
from app.utils import CHARS
|
| 4 |
+
|
| 5 |
+
NUM_CLASSES = len(CHARS)
|
| 6 |
+
|
| 7 |
+
class CRNN(nn.Module):
|
| 8 |
+
|
| 9 |
+
def __init__(self):
|
| 10 |
+
super().__init__()
|
| 11 |
+
|
| 12 |
+
self.cnn = nn.Sequential(
|
| 13 |
+
nn.Conv2d(1, 64, 3, padding=1), nn.BatchNorm2d(64), nn.ReLU(),
|
| 14 |
+
nn.MaxPool2d(2, 2),
|
| 15 |
+
|
| 16 |
+
nn.Conv2d(64, 128, 3, padding=1), nn.BatchNorm2d(128), nn.ReLU(),
|
| 17 |
+
nn.MaxPool2d(2, 2),
|
| 18 |
+
|
| 19 |
+
nn.Conv2d(128, 256, 3, padding=1), nn.BatchNorm2d(256), nn.ReLU(),
|
| 20 |
+
nn.MaxPool2d((2, 1)),
|
| 21 |
+
|
| 22 |
+
nn.Conv2d(256, 256, 3, padding=1), nn.ReLU()
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
self.rnn = nn.LSTM(
|
| 26 |
+
input_size=256 * 7,
|
| 27 |
+
hidden_size=256,
|
| 28 |
+
num_layers=2,
|
| 29 |
+
bidirectional=True,
|
| 30 |
+
batch_first=True
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
self.fc = nn.Linear(512, NUM_CLASSES)
|
| 34 |
+
|
| 35 |
+
def forward(self, x):
|
| 36 |
+
x = self.cnn(x)
|
| 37 |
+
b, c, h, w = x.shape
|
| 38 |
+
x = x.permute(0, 3, 1, 2).reshape(b, w, c * h)
|
| 39 |
+
x, _ = self.rnn(x)
|
| 40 |
+
return self.fc(x)
|
app/utils.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import string
|
| 2 |
+
import torch
|
| 3 |
+
|
| 4 |
+
DIGITS = string.digits
|
| 5 |
+
LETTERS = string.ascii_lowercase
|
| 6 |
+
BLANK_CHAR = "-"
|
| 7 |
+
|
| 8 |
+
CHARS = DIGITS + LETTERS + BLANK_CHAR
|
| 9 |
+
|
| 10 |
+
char2idx = {c: i for i, c in enumerate(CHARS)}
|
| 11 |
+
idx2char = {i: c for c, i in char2idx.items()}
|
| 12 |
+
|
| 13 |
+
def ctc_decode(logits):
|
| 14 |
+
preds = logits.argmax(dim=2)
|
| 15 |
+
texts = []
|
| 16 |
+
|
| 17 |
+
for seq in preds:
|
| 18 |
+
prev = None
|
| 19 |
+
text = ""
|
| 20 |
+
for idx in seq:
|
| 21 |
+
idx = idx.item()
|
| 22 |
+
if idx != prev and idx != char2idx[BLANK_CHAR]:
|
| 23 |
+
text += idx2char[idx]
|
| 24 |
+
prev = idx
|
| 25 |
+
texts.append(text)
|
| 26 |
+
|
| 27 |
+
return texts
|
requirements.txt
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn
|
| 3 |
+
python-multipart
|
| 4 |
+
torch
|
| 5 |
+
torchvision
|
| 6 |
+
opencv-python
|
| 7 |
+
numpy
|
| 8 |
+
pillow
|
| 9 |
+
|
static/app.js
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
async function predict() {
|
| 2 |
+
const input = document.getElementById("imageInput");
|
| 3 |
+
const resultText = document.getElementById("resultText");
|
| 4 |
+
const preview = document.getElementById("previewImage");
|
| 5 |
+
|
| 6 |
+
// ---------------------------
|
| 7 |
+
// Validation
|
| 8 |
+
// ---------------------------
|
| 9 |
+
if (!input.files || input.files.length === 0) {
|
| 10 |
+
alert("Please select a captcha image first");
|
| 11 |
+
return;
|
| 12 |
+
}
|
| 13 |
+
|
| 14 |
+
const file = input.files[0];
|
| 15 |
+
|
| 16 |
+
// ---------------------------
|
| 17 |
+
// Image Preview
|
| 18 |
+
// ---------------------------
|
| 19 |
+
preview.src = URL.createObjectURL(file);
|
| 20 |
+
preview.style.display = "block";
|
| 21 |
+
|
| 22 |
+
// ---------------------------
|
| 23 |
+
// Prepare Form Data
|
| 24 |
+
// ---------------------------
|
| 25 |
+
const formData = new FormData();
|
| 26 |
+
formData.append("file", file);
|
| 27 |
+
|
| 28 |
+
// ---------------------------
|
| 29 |
+
// UI Loading State
|
| 30 |
+
// ---------------------------
|
| 31 |
+
resultText.innerText = "⏳ Reading captcha...";
|
| 32 |
+
|
| 33 |
+
try {
|
| 34 |
+
// ---------------------------
|
| 35 |
+
// API Call
|
| 36 |
+
// ---------------------------
|
| 37 |
+
const response = await fetch("/predict", {
|
| 38 |
+
method: "POST",
|
| 39 |
+
body: formData
|
| 40 |
+
});
|
| 41 |
+
|
| 42 |
+
if (!response.ok) {
|
| 43 |
+
throw new Error("Server error");
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
const data = await response.json();
|
| 47 |
+
|
| 48 |
+
// ---------------------------
|
| 49 |
+
// Show Result
|
| 50 |
+
// ---------------------------
|
| 51 |
+
resultText.innerText = data.text || "❌ No text detected";
|
| 52 |
+
|
| 53 |
+
} catch (error) {
|
| 54 |
+
console.error("Prediction error:", error);
|
| 55 |
+
resultText.innerText = "❌ Prediction failed";
|
| 56 |
+
}
|
| 57 |
+
}
|
static/style.css
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
body {
|
| 2 |
+
font-family: Arial, sans-serif;
|
| 3 |
+
background: #0f172a;
|
| 4 |
+
color: white;
|
| 5 |
+
display: flex;
|
| 6 |
+
justify-content: center;
|
| 7 |
+
align-items: center;
|
| 8 |
+
height: 100vh;
|
| 9 |
+
}
|
| 10 |
+
|
| 11 |
+
.container {
|
| 12 |
+
background: #020617;
|
| 13 |
+
padding: 30px;
|
| 14 |
+
border-radius: 12px;
|
| 15 |
+
width: 400px;
|
| 16 |
+
text-align: center;
|
| 17 |
+
box-shadow: 0 0 20px rgba(0,0,0,0.5);
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
h1 {
|
| 21 |
+
margin-bottom: 10px;
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
input[type="file"] {
|
| 25 |
+
margin: 15px 0;
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
button {
|
| 29 |
+
background: #22c55e;
|
| 30 |
+
border: none;
|
| 31 |
+
padding: 10px 20px;
|
| 32 |
+
color: black;
|
| 33 |
+
font-weight: bold;
|
| 34 |
+
cursor: pointer;
|
| 35 |
+
border-radius: 6px;
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
button:hover {
|
| 39 |
+
background: #16a34a;
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
.preview img {
|
| 43 |
+
margin-top: 15px;
|
| 44 |
+
max-width: 100%;
|
| 45 |
+
border-radius: 6px;
|
| 46 |
+
background: white;
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
.result {
|
| 50 |
+
margin-top: 20px;
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
#resultText {
|
| 54 |
+
font-size: 28px;
|
| 55 |
+
font-weight: bold;
|
| 56 |
+
color: #38bdf8;
|
| 57 |
+
}
|
templates/index.html
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8" />
|
| 5 |
+
<title>OCR Captcha Recognition</title>
|
| 6 |
+
<link rel="stylesheet" href="/static/style.css">
|
| 7 |
+
</head>
|
| 8 |
+
<body>
|
| 9 |
+
|
| 10 |
+
<div class="container">
|
| 11 |
+
<h1>🔤 OCR Captcha Recognition</h1>
|
| 12 |
+
<p>Upload a captcha image and let AI read it</p>
|
| 13 |
+
|
| 14 |
+
<input type="file" id="imageInput" accept="image/*">
|
| 15 |
+
<button onclick="predict()">🔍 Predict</button>
|
| 16 |
+
|
| 17 |
+
<div class="preview">
|
| 18 |
+
<img id="previewImage" />
|
| 19 |
+
</div>
|
| 20 |
+
|
| 21 |
+
<div class="result">
|
| 22 |
+
<h3>Prediction:</h3>
|
| 23 |
+
<span id="resultText">---</span>
|
| 24 |
+
</div>
|
| 25 |
+
</div>
|
| 26 |
+
|
| 27 |
+
<script src="/static/app.js"></script>
|
| 28 |
+
</body>
|
| 29 |
+
</html>
|