szoya commited on
Commit
f73e9d3
·
verified ·
1 Parent(s): b290e53

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +13 -49
main.py CHANGED
@@ -1,77 +1,41 @@
1
  from fastapi import FastAPI, UploadFile, File
2
  from paddleocr import PaddleOCR
3
- from PIL import Image, ImageOps
4
  import numpy as np
5
  import io
6
- import cv2
7
 
8
  app = FastAPI()
9
 
10
  # ---------------------------------------------------------
11
- # 🧠 LOAD MODEL - SERVER GRADE CONFIGURATION
12
  # ---------------------------------------------------------
13
- # We enable 'use_angle_cls' because we have 16GB RAM now.
14
- # We keep lang='en' which downloads the standard models.
15
  ocr = PaddleOCR(
16
- use_angle_cls=True, # ENABLED: Fixes rotation (Crucial for accuracy)
17
  lang='en',
18
- use_gpu=False, # HF Free tier is CPU only
19
- show_log=False,
20
-
21
- # --- ACCURACY TUNING PARAMETERS ---
22
- det_db_score_mode='slow', # SLOWER BUT MORE PRECISE: detailed polygon check
23
- det_db_box_thresh=0.5, # LOWER THRESHOLD: Detects fainter text
24
- det_db_unclip_ratio=1.6, # LARGER BOXES: Prevents cutting off edges of letters
25
- cls_thresh=0.9, # STRICTER ROTATION: Only rotate if 90% sure
26
- use_mp=True, # MULTI-PROCESSING: Use all CPU cores
27
- total_process_num=2 # 2 vCPUs available on HF free tier
28
  )
29
 
30
  @app.get("/")
31
  def home():
32
- return {"status": "High-Precision OCR Ready"}
33
-
34
- def preprocess_image(image: Image.Image) -> np.ndarray:
35
- """
36
- Upscales and cleans image for maximum OCR readability.
37
- """
38
- # 1. Convert to RGB to ensure standard format
39
- img = image.convert("RGB")
40
-
41
- # 2. Upscale small images (OCR hates small text)
42
- # If width < 2000px, double the size
43
- w, h = img.size
44
- if w < 2000:
45
- new_w = int(w * 2)
46
- new_h = int(h * 2)
47
- img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
48
-
49
- # 3. Add a white border (padding)
50
- # OCR fails if text touches the very edge of the image
51
- img = ImageOps.expand(img, border=50, fill='white')
52
-
53
- return np.array(img)
54
 
55
  @app.post("/ocr")
56
  async def get_ocr(file: UploadFile = File(...)):
57
  try:
58
- # Read image
59
  content = await file.read()
60
- pil_image = Image.open(io.BytesIO(content))
61
-
62
- # --- PRE-PROCESSING STEP ---
63
- # Make the image bigger and cleaner
64
- img_array = preprocess_image(pil_image)
65
 
66
- # Run OCR with Classifier Enabled
 
67
  result = ocr.ocr(img_array, cls=True)
68
 
69
- # Extract text
70
  full_text = ""
71
- # Paddle returns a list of lines. If result is None, image was empty.
72
  if result and result[0]:
73
- # result[0] is the list of [box, (text, score)]
74
- # We just want the text
75
  text_lines = [line[1][0] for line in result[0]]
76
  full_text = "\n".join(text_lines)
77
 
 
1
  from fastapi import FastAPI, UploadFile, File
2
  from paddleocr import PaddleOCR
3
+ from PIL import Image
4
  import numpy as np
5
  import io
 
6
 
7
  app = FastAPI()
8
 
9
  # ---------------------------------------------------------
10
+ # 🧠 LOAD MODEL - STANDARD CONFIGURATION
11
  # ---------------------------------------------------------
12
+ # We use the defaults here because they are generally more robust
13
+ # for standard invoices than the aggressive "High Precision" settings.
14
  ocr = PaddleOCR(
15
+ use_angle_cls=True, # Keep this True to handle rotated pages
16
  lang='en',
17
+ use_gpu=False
 
 
 
 
 
 
 
 
 
18
  )
19
 
20
  @app.get("/")
21
  def home():
22
+ return {"status": "Standard OCR Ready"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  @app.post("/ocr")
25
  async def get_ocr(file: UploadFile = File(...)):
26
  try:
27
+ # 1. Read image
28
  content = await file.read()
29
+ image = Image.open(io.BytesIO(content)).convert("RGB")
30
+ img_array = np.array(image)
 
 
 
31
 
32
+ # 2. Run OCR (Standard Mode)
33
+ # cls=True ensures we check for rotation
34
  result = ocr.ocr(img_array, cls=True)
35
 
36
+ # 3. Extract text
37
  full_text = ""
 
38
  if result and result[0]:
 
 
39
  text_lines = [line[1][0] for line in result[0]]
40
  full_text = "\n".join(text_lines)
41