aniket9909 commited on
Commit
f816f01
·
verified ·
1 Parent(s): f13083b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +675 -0
app.py ADDED
@@ -0,0 +1,675 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, File, UploadFile, HTTPException, BackgroundTasks
2
+ from fastapi.responses import JSONResponse
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ import cv2
5
+ import numpy as np
6
+ from pyzbar import pyzbar
7
+ from PIL import Image
8
+ import fitz # PyMuPDF
9
+ import json
10
+ import base64
11
+ import logging
12
+ import re
13
+ import zlib
14
+ import asyncio
15
+ import warnings
16
+ import os
17
+ import hashlib
18
+ import time
19
+ from typing import Optional, Dict, Any, List, Tuple
20
+ from datetime import datetime
21
+ from concurrent.futures import ThreadPoolExecutor
22
+ import uuid
23
+
24
+ # ============ Suppress Warnings ============
25
+ warnings.filterwarnings('ignore')
26
+ os.environ['ZBAR_VERBOSE'] = '0'
27
+
28
+ # Suppress stderr on Windows
29
+ import sys
30
+ if sys.platform == 'win32':
31
+ import ctypes
32
+ try:
33
+ kernel32 = ctypes.WinDLL('kernel32')
34
+ except:
35
+ pass
36
+
37
+ logging.basicConfig(
38
+ level=logging.INFO,
39
+ format='%(asctime)s - %(name)s - [%(levelname)s] - %(message)s'
40
+ )
41
+ logger = logging.getLogger(__name__)
42
+
43
+ app = FastAPI(title="E-Invoice QR Extractor API", version="2.3.0")
44
+
45
+ # ============ Configuration ============
46
+ ALLOW_PAYMENT_FALLBACK = False
47
+ THUMBNAIL_DPI = 220
48
+ CROP_MARGIN = 0.15
49
+ MAX_CONCURRENT_REQUESTS = 5 # Limit concurrent processing
50
+ MAX_FILE_SIZE = 50 * 1024 * 1024 # 50MB
51
+ PROCESSING_TIMEOUT = 300 # 5 minutes
52
+
53
+ # ============ Concurrency Control ============
54
+ processing_semaphore = asyncio.Semaphore(MAX_CONCURRENT_REQUESTS)
55
+ executor = ThreadPoolExecutor(max_workers=MAX_CONCURRENT_REQUESTS)
56
+
57
+ # ============ In-Memory Tracking (simple) ============
58
+ request_tracking: Dict[str, Dict[str, Any]] = {}
59
+
60
+ # ============ CORS ============
61
+ app.add_middleware(
62
+ CORSMiddleware,
63
+ allow_origins=["*"],
64
+ allow_credentials=True,
65
+ allow_methods=["*"],
66
+ allow_headers=["*"],
67
+ )
68
+
69
+ # ============ Helper Functions ============
70
+
71
+ def get_file_hash(content: bytes) -> str:
72
+ """Generate MD5 hash for file deduplication."""
73
+ return hashlib.md5(content).hexdigest()
74
+
75
+ def track_request(request_id: str, data: Dict[str, Any]):
76
+ """Track request in memory."""
77
+ request_tracking[request_id] = {
78
+ **data,
79
+ "created_at": datetime.now().isoformat(),
80
+ "status": "processing"
81
+ }
82
+
83
+ def update_request(request_id: str, status: str, result: Optional[Dict] = None):
84
+ """Update request status."""
85
+ if request_id in request_tracking:
86
+ request_tracking[request_id]["status"] = status
87
+ request_tracking[request_id]["updated_at"] = datetime.now().isoformat()
88
+ if result:
89
+ request_tracking[request_id]["result"] = result
90
+
91
+ def cleanup_old_tracking():
92
+ """Remove tracking older than 1 hour."""
93
+ cutoff = time.time() - 3600
94
+ to_remove = []
95
+ for req_id, data in request_tracking.items():
96
+ created = datetime.fromisoformat(data["created_at"]).timestamp()
97
+ if created < cutoff:
98
+ to_remove.append(req_id)
99
+ for req_id in to_remove:
100
+ del request_tracking[req_id]
101
+
102
+ # ============ Decoding Helpers ============
103
+
104
+ def decode_gst_qr(raw_data: str) -> Dict[str, Any]:
105
+ """Decode GST QR (JSON / JWT-like base64url / Base64 / Base64+zlib)."""
106
+ s = raw_data or ""
107
+
108
+ # JSON
109
+ try:
110
+ return json.loads(s)
111
+ except Exception:
112
+ pass
113
+
114
+ # JWT-like base64url payload
115
+ try:
116
+ if "." in s:
117
+ parts = s.split(".")
118
+ if len(parts) >= 2:
119
+ payload = parts[1]
120
+ payload += "=" * ((4 - len(payload) % 4) % 4)
121
+ b = base64.urlsafe_b64decode(payload)
122
+ try:
123
+ return json.loads(b.decode("utf-8"))
124
+ except Exception:
125
+ try:
126
+ return json.loads(zlib.decompress(b).decode("utf-8"))
127
+ except Exception:
128
+ pass
129
+ except Exception:
130
+ pass
131
+
132
+ # Plain Base64
133
+ try:
134
+ padded = s + "=" * ((4 - len(s) % 4) % 4)
135
+ b = base64.b64decode(padded)
136
+ try:
137
+ return json.loads(b.decode("utf-8"))
138
+ except Exception:
139
+ try:
140
+ return json.loads(zlib.decompress(b).decode("utf-8"))
141
+ except Exception:
142
+ pass
143
+ except Exception:
144
+ pass
145
+
146
+ return {"raw": s}
147
+
148
+ # ============ Classification ============
149
+ GSTIN_REGEX = re.compile(r"\b\d{2}[A-Z]{5}\d{4}[A-Z]\dZ[A-Z0-9]\b")
150
+
151
+ def classify_qr(raw_text: str, decoded_obj: Optional[Dict[str, Any]] = None) -> str:
152
+ """Return 'einvoice' | 'payment' | 'unknown'"""
153
+ text = (raw_text or "").strip()
154
+ lower = text.lower()
155
+
156
+ # Payment patterns
157
+ if lower.startswith("upi://") or "upi://pay" in lower:
158
+ return "payment"
159
+ if text.startswith("000201"):
160
+ return "payment"
161
+ if any(k in lower for k in ["paytm", "phonepe", "gpay", "googlepay", "bharatqr", "bhim upi", "upi://collect"]):
162
+ return "payment"
163
+
164
+ # E-Invoice patterns
165
+ parsed = decoded_obj or decode_gst_qr(text)
166
+ if isinstance(parsed, dict):
167
+ keys = {k.lower() for k in parsed.keys()}
168
+ e_keys = {"irn", "sellergstin", "buyergstin", "docno", "docdt", "totinvval", "mainhsncode", "signedqrcode"}
169
+ if keys & e_keys:
170
+ return "einvoice"
171
+
172
+ if GSTIN_REGEX.search(text):
173
+ return "einvoice"
174
+
175
+ if len(text) >= 200 and not (lower.startswith("upi://") or text.startswith("000201")):
176
+ return "einvoice"
177
+
178
+ return "unknown"
179
+
180
+ def pick_einvoice_first(payloads: List[str]) -> Optional[Dict[str, Any]]:
181
+ """Prefer E-Invoice QR over others."""
182
+ first_any = None
183
+ for raw in payloads:
184
+ dec = decode_gst_qr(raw)
185
+ cls = classify_qr(raw, dec)
186
+ if first_any is None:
187
+ first_any = (dec, cls)
188
+ if cls == "einvoice":
189
+ dec["_classification"] = "einvoice"
190
+ return dec
191
+ if ALLOW_PAYMENT_FALLBACK and first_any:
192
+ dec, cls = first_any
193
+ dec["_classification"] = cls
194
+ dec["_note"] = "No E-Invoice QR; returning first available QR"
195
+ return dec
196
+ return None
197
+
198
+ # ============ Image Processing ============
199
+
200
+ def enhance_image_for_qr(image: np.ndarray) -> np.ndarray:
201
+ """Enhance image for better QR detection."""
202
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
203
+ clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
204
+ enhanced = clahe.apply(gray)
205
+ denoised = cv2.fastNlMeansDenoising(enhanced, None, 10, 7, 21)
206
+ kernel = np.array([[-1, -1, -1], [-1, 9, -1], [-1, -1, -1]])
207
+ sharpened = cv2.filter2D(denoised, -1, kernel)
208
+ binary = cv2.adaptiveThreshold(sharpened, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
209
+ return binary
210
+
211
+ # ============ QR Decoders ============
212
+
213
+ def detect_all_qr_opencv(image: np.ndarray) -> List[str]:
214
+ """Detect multiple QR codes using OpenCV."""
215
+ res: List[str] = []
216
+ det = cv2.QRCodeDetector()
217
+ try:
218
+ data_list, _, _ = det.detectAndDecodeMulti(image)
219
+ if isinstance(data_list, (list, tuple)):
220
+ res.extend([d for d in data_list if d])
221
+ except Exception:
222
+ pass
223
+ try:
224
+ d, _, _ = det.detectAndDecode(image)
225
+ if d:
226
+ res.append(d)
227
+ except Exception:
228
+ pass
229
+ return res
230
+
231
+ def detect_all_qr_pyzbar(image: np.ndarray) -> List[str]:
232
+ """Detect multiple QR codes using pyzbar (with warning suppression)."""
233
+ res: List[str] = []
234
+ try:
235
+ with warnings.catch_warnings():
236
+ warnings.simplefilter("ignore")
237
+ decoded_objects = pyzbar.decode(image)
238
+ for obj in decoded_objects:
239
+ if obj.type == 'QRCODE':
240
+ try:
241
+ s = obj.data.decode('utf-8', errors='ignore')
242
+ if s:
243
+ res.append(s)
244
+ except Exception:
245
+ continue
246
+ except Exception:
247
+ pass
248
+ return res
249
+
250
+ def dedupe_keep_order(items: List[str]) -> List[str]:
251
+ seen = set()
252
+ out: List[str] = []
253
+ for it in items:
254
+ if it not in seen:
255
+ seen.add(it)
256
+ out.append(it)
257
+ return out
258
+
259
+ # ============ Image QR Extraction ============
260
+
261
+ def extract_qr_from_image(image_bytes: bytes) -> Optional[Dict[str, Any]]:
262
+ """Decode ALL QRs and return E-Invoice preferred."""
263
+ nparr = np.frombuffer(image_bytes, np.uint8)
264
+ img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
265
+ if img is None:
266
+ return None
267
+
268
+ all_payloads: List[str] = []
269
+ all_payloads += detect_all_qr_opencv(img)
270
+ all_payloads += detect_all_qr_pyzbar(img)
271
+
272
+ if not all_payloads:
273
+ enh = enhance_image_for_qr(img)
274
+ enh_bgr = cv2.cvtColor(enh, cv2.COLOR_GRAY2BGR)
275
+ all_payloads += detect_all_qr_opencv(enh_bgr)
276
+ all_payloads += detect_all_qr_pyzbar(enh)
277
+
278
+ all_payloads = dedupe_keep_order(all_payloads)
279
+
280
+ if not all_payloads:
281
+ h, w = img.shape[:2]
282
+ regions = [
283
+ img[0:int(h*0.45), int(w*0.55):w],
284
+ img[:, int(w*0.66):w],
285
+ ]
286
+ for roi in regions:
287
+ if roi.size == 0:
288
+ continue
289
+ p = detect_all_qr_opencv(roi) + detect_all_qr_pyzbar(roi)
290
+ all_payloads += p
291
+ if p:
292
+ break
293
+ all_payloads = dedupe_keep_order(all_payloads)
294
+
295
+ if not all_payloads:
296
+ return None
297
+
298
+ return pick_einvoice_first(all_payloads)
299
+
300
+ # ============ PDF Processing ============
301
+
302
+ def detect_boxes_on_thumbnail(page: fitz.Page, thumb_dpi: int = THUMBNAIL_DPI) -> List[fitz.Rect]:
303
+ """Detect QR boxes on thumbnail."""
304
+ zoom = thumb_dpi / 72.0
305
+ mat = fitz.Matrix(zoom, zoom)
306
+ pix = page.get_pixmap(matrix=mat, alpha=False)
307
+ img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)
308
+ if pix.n == 4:
309
+ img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
310
+
311
+ boxes: List[fitz.Rect] = []
312
+ det = cv2.QRCodeDetector()
313
+ try:
314
+ ok, pts = det.detect(img)
315
+ if ok and pts is not None:
316
+ for poly in np.array(pts):
317
+ xs, ys = poly[:, 0], poly[:, 1]
318
+ x1, y1, x2, y2 = float(xs.min()), float(ys.min()), float(xs.max()), float(ys.max())
319
+ w, h = x2 - x1, y2 - y1
320
+ x1 -= CROP_MARGIN * w
321
+ y1 -= CROP_MARGIN * h
322
+ x2 += CROP_MARGIN * w
323
+ y2 += CROP_MARGIN * h
324
+ rect_pdf = fitz.Rect(x1/zoom, y1/zoom, x2/zoom, y2/zoom) & page.rect
325
+ if not rect_pdf.is_empty:
326
+ boxes.append(rect_pdf)
327
+ except Exception as e:
328
+ logger.debug(f"Thumbnail detect failed: {e}")
329
+ return boxes
330
+
331
+ def thumbnail_scan_all_pages(pdf_bytes: bytes, pages: List[int] = None, thumb_dpi: int = THUMBNAIL_DPI) -> Dict[int, List[fitz.Rect]]:
332
+ """Scan thumbnails for QR codes."""
333
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf")
334
+ total = doc.page_count
335
+
336
+ if pages:
337
+ indices = [p-1 for p in pages if 1 <= p <= total]
338
+ else:
339
+ indices = list(range(total))
340
+
341
+ hits: Dict[int, List[fitz.Rect]] = {}
342
+ try:
343
+ for i in indices:
344
+ page = doc[i]
345
+ rects = detect_boxes_on_thumbnail(page, thumb_dpi)
346
+ if rects:
347
+ hits[i+1] = rects
348
+ finally:
349
+ doc.close()
350
+ return hits
351
+
352
+ def extract_qr_from_pdf(pdf_bytes: bytes, dpi: int, pages: List[int] = None, scan_all_first: bool = True) -> Optional[Dict[str, Any]]:
353
+ """Extract QR from PDF with E-Invoice preference."""
354
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf")
355
+ total = doc.page_count
356
+
357
+ if pages:
358
+ target_pages = [p for p in pages if 1 <= p <= total]
359
+ else:
360
+ target_pages = list(range(1, total+1))
361
+
362
+ logger.debug(f"[PDF] target_pages={target_pages}, dpi={dpi}")
363
+
364
+ zoom = dpi / 72.0
365
+ mat = fitz.Matrix(zoom, zoom)
366
+
367
+ try:
368
+ page_boxes_map: Dict[int, List[fitz.Rect]] = {}
369
+ if scan_all_first:
370
+ page_boxes_map = thumbnail_scan_all_pages(pdf_bytes, pages=target_pages, thumb_dpi=THUMBNAIL_DPI)
371
+ candidate_pages = list(page_boxes_map.keys()) if page_boxes_map else target_pages
372
+ else:
373
+ candidate_pages = target_pages
374
+
375
+ for p1 in candidate_pages:
376
+ page = doc[p1-1]
377
+
378
+ clips = page_boxes_map.get(p1, [])
379
+ for clip in clips:
380
+ pix = page.get_pixmap(matrix=mat, clip=clip, alpha=False)
381
+ res = extract_qr_from_image(pix.tobytes("png"))
382
+ if res:
383
+ res["_page_number"] = p1
384
+ res["_dpi_used"] = dpi
385
+ res["_clip"] = [clip.x0, clip.y0, clip.x1, clip.y1]
386
+ doc.close()
387
+ return res
388
+
389
+ pix = page.get_pixmap(matrix=mat, alpha=False)
390
+ res = extract_qr_from_image(pix.tobytes("png"))
391
+ if res:
392
+ res["_page_number"] = p1
393
+ res["_dpi_used"] = dpi
394
+ res["_clip"] = None
395
+ doc.close()
396
+ return res
397
+
398
+ doc.close()
399
+ return None
400
+
401
+ except Exception as e:
402
+ logger.error(f"[PDF] Processing error: {e}")
403
+ try:
404
+ doc.close()
405
+ except Exception:
406
+ pass
407
+ return None
408
+
409
+ # ============ Processing with Timeout ============
410
+
411
+ async def process_file_with_timeout(content: bytes, filename: str, dpi: int, pages: str, scan_all_first: bool) -> Dict[str, Any]:
412
+ """Process file with timeout protection."""
413
+ loop = asyncio.get_event_loop()
414
+
415
+ try:
416
+ ext = (filename or "").lower().split(".")[-1]
417
+ is_pdf = (ext == "pdf") or content.startswith(b"%PDF")
418
+
419
+ if is_pdf:
420
+ page_list: List[int] = []
421
+ if pages.strip():
422
+ for tok in pages.split(","):
423
+ tok = tok.strip()
424
+ if tok.isdigit():
425
+ page_list.append(int(tok))
426
+
427
+ result = await asyncio.wait_for(
428
+ loop.run_in_executor(executor, extract_qr_from_pdf, content, dpi, page_list or None, scan_all_first),
429
+ timeout=PROCESSING_TIMEOUT
430
+ )
431
+ else:
432
+ result = await asyncio.wait_for(
433
+ loop.run_in_executor(executor, extract_qr_from_image, content),
434
+ timeout=PROCESSING_TIMEOUT
435
+ )
436
+
437
+ if result:
438
+ return {
439
+ "success": True,
440
+ "qr_data": result,
441
+ "message": "QR code extracted successfully (E-Invoice preferred)",
442
+ "filename": filename
443
+ }
444
+ else:
445
+ return {
446
+ "success": False,
447
+ "qr_data": None,
448
+ "message": "No QR code found in the document",
449
+ "filename": filename
450
+ }
451
+
452
+ except asyncio.TimeoutError:
453
+ logger.error(f"Timeout processing {filename}")
454
+ return {
455
+ "success": False,
456
+ "qr_data": None,
457
+ "message": "Processing timeout exceeded",
458
+ "filename": filename,
459
+ "error": "timeout"
460
+ }
461
+ except Exception as e:
462
+ logger.error(f"Error processing {filename}: {e}")
463
+ return {
464
+ "success": False,
465
+ "qr_data": None,
466
+ "message": f"Error processing file: {str(e)}",
467
+ "filename": filename,
468
+ "error": str(e)
469
+ }
470
+
471
+ # ============ API Endpoints ============
472
+
473
+ @app.get("/")
474
+ def root():
475
+ return {
476
+ "service": "E-Invoice QR Extractor API",
477
+ "version": "2.3.0",
478
+ "features": {
479
+ "single_file": True,
480
+ "batch_upload": True,
481
+ "concurrency_limit": MAX_CONCURRENT_REQUESTS,
482
+ "max_file_size": f"{MAX_FILE_SIZE / (1024*1024)}MB",
483
+ "timeout": f"{PROCESSING_TIMEOUT}s"
484
+ },
485
+ "endpoints": {
486
+ "single": "POST /extract-qr",
487
+ "batch": "POST /batch-extract",
488
+ "tracking": "GET /tracking/{request_id}",
489
+ "stats": "GET /stats",
490
+ "health": "GET /health"
491
+ }
492
+ }
493
+
494
+ @app.post("/extract-qr")
495
+ async def extract_qr(
496
+ file: UploadFile = File(...),
497
+ dpi: int = 1200,
498
+ pages: str = "",
499
+ scan_all_first: bool = True
500
+ ):
501
+ """
502
+ Single file QR extraction with concurrency control.
503
+ """
504
+ request_id = str(uuid.uuid4())
505
+
506
+ async with processing_semaphore:
507
+ try:
508
+ content = await file.read()
509
+ file_size = len(content)
510
+
511
+ if file_size > MAX_FILE_SIZE:
512
+ raise HTTPException(status_code=413, detail="File too large")
513
+
514
+ file_hash = get_file_hash(content)
515
+
516
+ logger.info(f"[{request_id}] Processing: {file.filename}, size: {file_size} bytes")
517
+
518
+ # Track request
519
+ track_request(request_id, {
520
+ "filename": file.filename,
521
+ "file_size": file_size,
522
+ "file_hash": file_hash,
523
+ "dpi": dpi,
524
+ "pages": pages
525
+ })
526
+
527
+ # Process with timeout
528
+ result = await process_file_with_timeout(content, file.filename, dpi, pages, scan_all_first)
529
+
530
+ # Update tracking
531
+ update_request(request_id, "completed" if result["success"] else "no_qr_found", result)
532
+
533
+ logger.info(f"[{request_id}] Completed: {result['success']}")
534
+
535
+ if result["success"]:
536
+ return JSONResponse(status_code=200, content={
537
+ "success": True,
538
+ "qr_data": result["qr_data"],
539
+ "message": result["message"],
540
+ "request_id": request_id
541
+ })
542
+ else:
543
+ return JSONResponse(status_code=404, content={
544
+ "success": False,
545
+ "qr_data": None,
546
+ "message": result["message"],
547
+ "request_id": request_id
548
+ })
549
+
550
+ except HTTPException:
551
+ update_request(request_id, "failed", {"error": "File too large"})
552
+ raise
553
+ except Exception as e:
554
+ logger.error(f"[{request_id}] Error: {e}")
555
+ update_request(request_id, "failed", {"error": str(e)})
556
+ raise HTTPException(status_code=500, detail=f"Error processing file: {str(e)}")
557
+
558
+ @app.post("/batch-extract")
559
+ async def batch_extract(
560
+ files: List[UploadFile] = File(...),
561
+ dpi: int = 1200,
562
+ pages: str = ""
563
+ ):
564
+ """
565
+ Batch file upload - process multiple files concurrently.
566
+ Returns results for all files.
567
+ """
568
+ batch_id = str(uuid.uuid4())
569
+ logger.info(f"[BATCH {batch_id}] Received {len(files)} files")
570
+
571
+ results = []
572
+
573
+ # Process files with semaphore control
574
+ async def process_single(file: UploadFile, index: int):
575
+ async with processing_semaphore:
576
+ try:
577
+ content = await file.read()
578
+ file_size = len(content)
579
+
580
+ if file_size > MAX_FILE_SIZE:
581
+ return {
582
+ "filename": file.filename,
583
+ "index": index,
584
+ "success": False,
585
+ "error": "File too large",
586
+ "file_size": file_size
587
+ }
588
+
589
+ logger.info(f"[BATCH {batch_id}] [{index+1}/{len(files)}] Processing: {file.filename}")
590
+
591
+ result = await process_file_with_timeout(content, file.filename, dpi, pages, True)
592
+ result["index"] = index
593
+ result["file_size"] = file_size
594
+
595
+ return result
596
+
597
+ except Exception as e:
598
+ logger.error(f"[BATCH {batch_id}] Error processing {file.filename}: {e}")
599
+ return {
600
+ "filename": file.filename,
601
+ "index": index,
602
+ "success": False,
603
+ "error": str(e)
604
+ }
605
+
606
+ # Process all files concurrently (limited by semaphore)
607
+ tasks = [process_single(file, i) for i, file in enumerate(files)]
608
+ results = await asyncio.gather(*tasks)
609
+
610
+ # Summary
611
+ successful = sum(1 for r in results if r.get("success"))
612
+ failed = len(results) - successful
613
+
614
+ logger.info(f"[BATCH {batch_id}] Completed: {successful} successful, {failed} failed")
615
+
616
+ return {
617
+ "batch_id": batch_id,
618
+ "total_files": len(files),
619
+ "successful": successful,
620
+ "failed": failed,
621
+ "results": results
622
+ }
623
+
624
+ @app.get("/tracking/{request_id}")
625
+ def get_tracking(request_id: str):
626
+ """Get tracking information for a request."""
627
+ if request_id not in request_tracking:
628
+ raise HTTPException(status_code=404, detail="Request not found")
629
+
630
+ return request_tracking[request_id]
631
+
632
+ @app.get("/stats")
633
+ def get_stats():
634
+ """Get current processing statistics."""
635
+ cleanup_old_tracking()
636
+
637
+ total_requests = len(request_tracking)
638
+ completed = sum(1 for r in request_tracking.values() if r["status"] == "completed")
639
+ failed = sum(1 for r in request_tracking.values() if r["status"] == "failed")
640
+ processing = sum(1 for r in request_tracking.values() if r["status"] == "processing")
641
+
642
+ return {
643
+ "current_load": {
644
+ "processing": processing,
645
+ "available_slots": MAX_CONCURRENT_REQUESTS - processing,
646
+ "max_concurrent": MAX_CONCURRENT_REQUESTS
647
+ },
648
+ "statistics": {
649
+ "total_requests": total_requests,
650
+ "completed": completed,
651
+ "failed": failed,
652
+ "no_qr_found": sum(1 for r in request_tracking.values() if r["status"] == "no_qr_found")
653
+ },
654
+ "configuration": {
655
+ "max_file_size": f"{MAX_FILE_SIZE / (1024*1024)}MB",
656
+ "processing_timeout": f"{PROCESSING_TIMEOUT}s",
657
+ "thumbnail_dpi": THUMBNAIL_DPI,
658
+ "max_concurrent": MAX_CONCURRENT_REQUESTS
659
+ }
660
+ }
661
+
662
+ @app.get("/health")
663
+ def health_check():
664
+ """Health check endpoint."""
665
+ processing = sum(1 for r in request_tracking.values() if r["status"] == "processing")
666
+
667
+ return {
668
+ "status": "healthy",
669
+ "processing": processing,
670
+ "available_slots": MAX_CONCURRENT_REQUESTS - processing
671
+ }
672
+
673
+ if __name__ == "__main__":
674
+ import uvicorn
675
+ uvicorn.run(app, host="0.0.0.0", port=7860, log_level="info", workers=1)