MedhaCodes commited on
Commit
dfc2fe0
·
verified ·
1 Parent(s): fd6243f

Upload 9 files

Browse files
Files changed (9) hide show
  1. Dockerfile +30 -0
  2. app/infer.py +57 -0
  3. app/main.py +33 -0
  4. app/model.py +40 -0
  5. app/utils.py +27 -0
  6. requirements.txt +9 -0
  7. static/app.js +57 -0
  8. static/style.css +57 -0
  9. templates/index.html +29 -0
Dockerfile ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # ================================
6
+ # SYSTEM DEPENDENCIES (CRITICAL)
7
+ # ================================
8
+ RUN apt-get update && apt-get install -y \
9
+ libgl1 \
10
+ libglib2.0-0 \
11
+ libsm6 \
12
+ libxext6 \
13
+ libxrender1 \
14
+ && rm -rf /var/lib/apt/lists/*
15
+
16
+ # ================================
17
+ # PYTHON DEPENDENCIES
18
+ # ================================
19
+ COPY requirements.txt .
20
+
21
+ RUN pip install --no-cache-dir -r requirements.txt
22
+
23
+ # ================================
24
+ # COPY PROJECT
25
+ # ================================
26
+ COPY . .
27
+
28
+ EXPOSE 8000
29
+
30
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
app/infer.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import torch
3
+ import numpy as np
4
+ import os
5
+
6
+ from app.model import CRNN
7
+ from app.utils import ctc_decode
8
+
9
+ # =========================
10
+ # DEVICE
11
+ # =========================
12
+ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
13
+
14
+ # =========================
15
+ # ABSOLUTE PATH FIX
16
+ # =========================
17
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
18
+ WEIGHTS_PATH = os.path.join(BASE_DIR, "weights", "ocr_model.pth")
19
+
20
+ if not os.path.exists(WEIGHTS_PATH):
21
+ raise FileNotFoundError(f"Model file not found: {WEIGHTS_PATH}")
22
+
23
+ # =========================
24
+ # LOAD MODEL
25
+ # =========================
26
+ model = CRNN()
27
+ model.load_state_dict(torch.load(WEIGHTS_PATH, map_location=DEVICE))
28
+ model.to(DEVICE)
29
+ model.eval()
30
+
31
+ # =========================
32
+ # IMAGE PREPROCESSING
33
+ # =========================
34
+ def preprocess_image(image_bytes):
35
+ np_img = np.frombuffer(image_bytes, np.uint8)
36
+ img = cv2.imdecode(np_img, cv2.IMREAD_GRAYSCALE)
37
+
38
+ if img is None:
39
+ raise ValueError("Invalid image uploaded")
40
+
41
+ img = cv2.resize(img, (160, 60))
42
+ img = img.astype("float32") / 255.0
43
+
44
+ tensor = torch.tensor(img).unsqueeze(0).unsqueeze(0)
45
+ return tensor.to(DEVICE)
46
+
47
+ # =========================
48
+ # OCR PREDICTION
49
+ # =========================
50
+ def predict(image_bytes):
51
+ img = preprocess_image(image_bytes)
52
+
53
+ with torch.no_grad():
54
+ logits = model(img)
55
+
56
+ text = ctc_decode(logits)[0]
57
+ return text
app/main.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, File, Request
2
+ from fastapi.responses import HTMLResponse
3
+ from fastapi.staticfiles import StaticFiles
4
+ from fastapi.templating import Jinja2Templates
5
+
6
+ from app.infer import predict
7
+
8
+ app = FastAPI(title="OCR Captcha Recognizer")
9
+
10
+ # -------------------------
11
+ # Static & Templates
12
+ # -------------------------
13
+ app.mount("/static", StaticFiles(directory="static"), name="static")
14
+ templates = Jinja2Templates(directory="templates")
15
+
16
+ # -------------------------
17
+ # ROOT → HTML (VERY IMPORTANT)
18
+ # -------------------------
19
+ @app.get("/", response_class=HTMLResponse)
20
+ async def home(request: Request):
21
+ return templates.TemplateResponse(
22
+ "index.html",
23
+ {"request": request}
24
+ )
25
+
26
+ # -------------------------
27
+ # OCR API
28
+ # -------------------------
29
+ @app.post("/predict")
30
+ async def predict_captcha(file: UploadFile = File(...)):
31
+ image_bytes = await file.read()
32
+ text = predict(image_bytes)
33
+ return {"text": text}
app/model.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from app.utils import CHARS
4
+
5
+ NUM_CLASSES = len(CHARS)
6
+
7
+ class CRNN(nn.Module):
8
+
9
+ def __init__(self):
10
+ super().__init__()
11
+
12
+ self.cnn = nn.Sequential(
13
+ nn.Conv2d(1, 64, 3, padding=1), nn.BatchNorm2d(64), nn.ReLU(),
14
+ nn.MaxPool2d(2, 2),
15
+
16
+ nn.Conv2d(64, 128, 3, padding=1), nn.BatchNorm2d(128), nn.ReLU(),
17
+ nn.MaxPool2d(2, 2),
18
+
19
+ nn.Conv2d(128, 256, 3, padding=1), nn.BatchNorm2d(256), nn.ReLU(),
20
+ nn.MaxPool2d((2, 1)),
21
+
22
+ nn.Conv2d(256, 256, 3, padding=1), nn.ReLU()
23
+ )
24
+
25
+ self.rnn = nn.LSTM(
26
+ input_size=256 * 7,
27
+ hidden_size=256,
28
+ num_layers=2,
29
+ bidirectional=True,
30
+ batch_first=True
31
+ )
32
+
33
+ self.fc = nn.Linear(512, NUM_CLASSES)
34
+
35
+ def forward(self, x):
36
+ x = self.cnn(x)
37
+ b, c, h, w = x.shape
38
+ x = x.permute(0, 3, 1, 2).reshape(b, w, c * h)
39
+ x, _ = self.rnn(x)
40
+ return self.fc(x)
app/utils.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import string
2
+ import torch
3
+
4
+ DIGITS = string.digits
5
+ LETTERS = string.ascii_lowercase
6
+ BLANK_CHAR = "-"
7
+
8
+ CHARS = DIGITS + LETTERS + BLANK_CHAR
9
+
10
+ char2idx = {c: i for i, c in enumerate(CHARS)}
11
+ idx2char = {i: c for c, i in char2idx.items()}
12
+
13
+ def ctc_decode(logits):
14
+ preds = logits.argmax(dim=2)
15
+ texts = []
16
+
17
+ for seq in preds:
18
+ prev = None
19
+ text = ""
20
+ for idx in seq:
21
+ idx = idx.item()
22
+ if idx != prev and idx != char2idx[BLANK_CHAR]:
23
+ text += idx2char[idx]
24
+ prev = idx
25
+ texts.append(text)
26
+
27
+ return texts
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ python-multipart
4
+ torch
5
+ torchvision
6
+ opencv-python
7
+ numpy
8
+ pillow
9
+
static/app.js ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ async function predict() {
2
+ const input = document.getElementById("imageInput");
3
+ const resultText = document.getElementById("resultText");
4
+ const preview = document.getElementById("previewImage");
5
+
6
+ // ---------------------------
7
+ // Validation
8
+ // ---------------------------
9
+ if (!input.files || input.files.length === 0) {
10
+ alert("Please select a captcha image first");
11
+ return;
12
+ }
13
+
14
+ const file = input.files[0];
15
+
16
+ // ---------------------------
17
+ // Image Preview
18
+ // ---------------------------
19
+ preview.src = URL.createObjectURL(file);
20
+ preview.style.display = "block";
21
+
22
+ // ---------------------------
23
+ // Prepare Form Data
24
+ // ---------------------------
25
+ const formData = new FormData();
26
+ formData.append("file", file);
27
+
28
+ // ---------------------------
29
+ // UI Loading State
30
+ // ---------------------------
31
+ resultText.innerText = "⏳ Reading captcha...";
32
+
33
+ try {
34
+ // ---------------------------
35
+ // API Call
36
+ // ---------------------------
37
+ const response = await fetch("/predict", {
38
+ method: "POST",
39
+ body: formData
40
+ });
41
+
42
+ if (!response.ok) {
43
+ throw new Error("Server error");
44
+ }
45
+
46
+ const data = await response.json();
47
+
48
+ // ---------------------------
49
+ // Show Result
50
+ // ---------------------------
51
+ resultText.innerText = data.text || "❌ No text detected";
52
+
53
+ } catch (error) {
54
+ console.error("Prediction error:", error);
55
+ resultText.innerText = "❌ Prediction failed";
56
+ }
57
+ }
static/style.css ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ body {
2
+ font-family: Arial, sans-serif;
3
+ background: #0f172a;
4
+ color: white;
5
+ display: flex;
6
+ justify-content: center;
7
+ align-items: center;
8
+ height: 100vh;
9
+ }
10
+
11
+ .container {
12
+ background: #020617;
13
+ padding: 30px;
14
+ border-radius: 12px;
15
+ width: 400px;
16
+ text-align: center;
17
+ box-shadow: 0 0 20px rgba(0,0,0,0.5);
18
+ }
19
+
20
+ h1 {
21
+ margin-bottom: 10px;
22
+ }
23
+
24
+ input[type="file"] {
25
+ margin: 15px 0;
26
+ }
27
+
28
+ button {
29
+ background: #22c55e;
30
+ border: none;
31
+ padding: 10px 20px;
32
+ color: black;
33
+ font-weight: bold;
34
+ cursor: pointer;
35
+ border-radius: 6px;
36
+ }
37
+
38
+ button:hover {
39
+ background: #16a34a;
40
+ }
41
+
42
+ .preview img {
43
+ margin-top: 15px;
44
+ max-width: 100%;
45
+ border-radius: 6px;
46
+ background: white;
47
+ }
48
+
49
+ .result {
50
+ margin-top: 20px;
51
+ }
52
+
53
+ #resultText {
54
+ font-size: 28px;
55
+ font-weight: bold;
56
+ color: #38bdf8;
57
+ }
templates/index.html ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <title>OCR Captcha Recognition</title>
6
+ <link rel="stylesheet" href="/static/style.css">
7
+ </head>
8
+ <body>
9
+
10
+ <div class="container">
11
+ <h1>🔤 OCR Captcha Recognition</h1>
12
+ <p>Upload a captcha image and let AI read it</p>
13
+
14
+ <input type="file" id="imageInput" accept="image/*">
15
+ <button onclick="predict()">🔍 Predict</button>
16
+
17
+ <div class="preview">
18
+ <img id="previewImage" />
19
+ </div>
20
+
21
+ <div class="result">
22
+ <h3>Prediction:</h3>
23
+ <span id="resultText">---</span>
24
+ </div>
25
+ </div>
26
+
27
+ <script src="/static/app.js"></script>
28
+ </body>
29
+ </html>