shayansjm commited on
Commit
6274ffa
·
verified ·
1 Parent(s): 8915ba7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -34
app.py CHANGED
@@ -3,64 +3,67 @@ from paddleocr import PaddleOCR
3
  from PIL import Image, ImageOps
4
  import numpy as np
5
 
6
- # We leave ocr as None. It will only load when someone clicks "Extract"
7
- ocr_model = None
8
 
9
  def process_bank_form(image):
10
- global ocr_model
11
 
12
  if image is None:
13
  return "Please upload an image."
14
 
15
  try:
16
- # 1. LAZY LOAD: This prevents the 'name ocr is not defined' error
17
- if ocr_model is None:
18
- print("First run: Loading PaddleOCR model into memory...")
19
- ocr_model = PaddleOCR(
 
20
  lang='en',
21
- ocr_version='PP-OCRv4',
22
- use_angle_cls=True,
23
- use_gpu=False,
24
- enable_mkldnn=True # Speeds up CPU processing
25
  )
26
 
27
- # 2. Image Pre-processing
 
28
  img = image.convert("RGB")
29
- img = ImageOps.exif_transpose(img) # Fixes phone photo rotation
30
  img_array = np.array(img)
31
 
32
- # 3. Run Inference
33
- # In 3.0+, we use the instance we just loaded
34
- result = ocr_model.ocr(img_array)
35
 
36
- # 4. Parsing with Safety Checks
37
  if not result or not isinstance(result, list) or result[0] is None:
38
- return "No text detected. Try a closer photo."
39
 
40
  extracted_text = []
41
  for line in result[0]:
42
- if isinstance(line, list) and len(line) >= 2:
43
- text_info = line[1]
44
- if text_info and len(text_info) >= 1:
45
- raw_string = str(text_info[0]).strip()
46
- conf = float(text_info[1])
47
- if raw_string and conf > 0.35:
48
- extracted_text.append(raw_string)
 
49
 
50
- return "\n".join(extracted_text) if extracted_text else "Could not read text."
 
 
 
51
 
52
  except Exception as e:
53
- return f"Error: {str(e)}\n\nTry: Cropping the image to just the text area."
54
 
55
- # UI with simplified components
56
- with gr.Blocks() as demo:
57
- gr.Markdown("### 🏦 English Bank Form OCR (Stable Edition)")
58
  with gr.Row():
59
- input_img = gr.Image(type="pil")
60
- output_text = gr.Textbox(label="Extracted Text", lines=15)
61
  btn = gr.Button("Extract Text", variant="primary")
 
62
  btn.click(fn=process_bank_form, inputs=input_img, outputs=output_text)
63
 
64
  if __name__ == "__main__":
65
- demo.launch()
66
-
 
3
  from PIL import Image, ImageOps
4
  import numpy as np
5
 
6
+ # Global variable to hold the model once loaded
7
+ ocr_instance = None
8
 
9
  def process_bank_form(image):
10
+ global ocr_instance
11
 
12
  if image is None:
13
  return "Please upload an image."
14
 
15
  try:
16
+ # 1. CLEAN INITIALIZATION (2026 Standard)
17
+ # We removed use_gpu, show_log, and rec_algorithm
18
+ if ocr_instance is None:
19
+ print("Initializing PaddleOCR 3.0 on CPU...")
20
+ ocr_instance = PaddleOCR(
21
  lang='en',
22
+ ocr_version='PP-OCRv4', # Best for handwriting
23
+ use_angle_cls=True
 
 
24
  )
25
 
26
+ # 2. IMAGE PREPARATION
27
+ # Standardization prevents the model from hitting 'ghost' boxes
28
  img = image.convert("RGB")
29
+ img = ImageOps.exif_transpose(img) # Corrects phone camera rotation
30
  img_array = np.array(img)
31
 
32
+ # 3. RUN INFERENCE
33
+ result = ocr_instance.ocr(img_array)
 
34
 
35
+ # 4. DEFENSIVE PARSING
36
  if not result or not isinstance(result, list) or result[0] is None:
37
+ return "No text detected. Try a closer photo or darker ink."
38
 
39
  extracted_text = []
40
  for line in result[0]:
41
+ # Standard PaddleOCR structure: [[box], [text, confidence]]
42
+ if len(line) >= 2 and len(line[1]) >= 1:
43
+ text_string = str(line[1][0]).strip()
44
+ confidence = float(line[1][1])
45
+
46
+ # Keep only text with reasonable confidence
47
+ if text_string and confidence > 0.35:
48
+ extracted_text.append(text_string)
49
 
50
+ if not extracted_text:
51
+ return "The AI saw the form but couldn't read the words. Please crop the photo."
52
+
53
+ return "\n".join(extracted_text)
54
 
55
  except Exception as e:
56
+ return f"System Error: {str(e)}\n\nTip: Go to Settings and click 'Factory Reboot' to clear the memory."
57
 
58
+ # Build the Gradio App
59
+ with gr.Blocks(theme=gr.themes.Base()) as demo:
60
+ gr.Markdown("## 🏦 English Bank Form OCR")
61
  with gr.Row():
62
+ input_img = gr.Image(type="pil", label="Bank Form Photo")
63
+ output_text = gr.Textbox(label="Result", lines=15)
64
  btn = gr.Button("Extract Text", variant="primary")
65
+
66
  btn.click(fn=process_bank_form, inputs=input_img, outputs=output_text)
67
 
68
  if __name__ == "__main__":
69
+ demo.launch(max_threads=1)