File size: 3,078 Bytes
af4d6e0
 
 
9f0ff72
af4d6e0
9f0ff72
fd06368
 
af4d6e0
0159203
af4d6e0
 
 
 
 
 
 
 
9f0ff72
fd06368
9f0ff72
 
af4d6e0
 
 
fd06368
af4d6e0
9f0ff72
af4d6e0
fd06368
 
9f0ff72
 
af4d6e0
fd06368
 
 
 
 
 
 
 
 
 
 
b5d008d
fd06368
 
 
 
 
 
 
 
 
665fc46
fd06368
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5d008d
3f12567
fd06368
 
 
 
 
3f12567
fd06368
 
83995ab
9f0ff72
 
 
83995ab
af4d6e0
 
fd06368
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
from fastapi import FastAPI, File, UploadFile
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware
from PIL import Image
import io
import torch
from transformers import AutoImageProcessor, AutoBackbone
import pytesseract  # OCR

app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"],
)

processor = AutoImageProcessor.from_pretrained("czczup/textnet-base")
model = AutoBackbone.from_pretrained("czczup/textnet-base")
model.eval()

@app.post("/detect")
async def detect_text(file: UploadFile = File(...)):
    try:
        # Lire image
        image_bytes = await file.read()
        image = Image.open(io.BytesIO(image_bytes)).convert("RGB")

        # Entrée TextNet
        inputs = processor(image, return_tensors="pt")
        with torch.no_grad():
            outputs = model(**inputs)

        # Feature map et heatmap
        fm = outputs.feature_maps[-1][0]  # dernière layer
        heatmap = fm.mean(dim=0).numpy()
        H, W = heatmap.shape
        threshold = heatmap.max() * 0.2

        # Points chauds
        points = [(x, y) for y in range(H) for x in range(W) if heatmap[y, x] > threshold]

        if not points:
            return JSONResponse([])

        # Regrouper par lignes simples
        lines = {}
        for x, y in points:
            key = int(y / 10)
            lines.setdefault(key, []).append((x, y))

        # Générer boxes et extraire texte OCR
        scale_x = image.width / W
        scale_y = image.height / H
        boxes = []

        for line in lines.values():
            xs = [p[0] for p in line]
            ys = [p[1] for p in line]
            min_x, max_x = min(xs), max(xs)
            min_y, max_y = min(ys), max(ys)

            if (max_x - min_x) < 5 or (max_y - min_y) < 2:
                continue

            # Crop pour OCR
            crop = image.crop((
                int(min_x * scale_x),
                int(min_y * scale_y),
                int(max_x * scale_x),
                int(max_y * scale_y)
            ))

            text = pytesseract.image_to_string(crop, lang='eng').strip()

            if len(text) < 2:
                continue

            if len(boxes) == 0:
                boxes.append({
                    "x": 10,
                    "y": 10,
                    "w": 100,
                    "h": 50,
                    "text": "Aucun texte détecté"
                })

            boxes.append({
                "x": int(min_x * scale_x),
                "y": int(min_y * scale_y),
                "w": int((max_x - min_x) * scale_x),
                "h": int((max_y - min_y) * scale_y),
                "text": text or "texte non reconnu"
            })
            print("BOXES:", boxes)
    
        return JSONResponse({
            "image_width": image.width,
            "image_height": image.height,
            "boxes": boxes
        })

    except Exception as e:
        return JSONResponse({"success": False, "error": str(e)}, status_code=500)