PrabhatGupta786 commited on
Commit
cbafc2a
·
verified ·
1 Parent(s): 1b92121

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -36
app.py CHANGED
@@ -3,75 +3,86 @@ import cv2
3
  import numpy as np
4
  from PIL import Image
5
  from transformers import pipeline
 
 
6
 
7
- # Load the stable small model
8
- print("Loading OCR Pipeline...")
9
- pipe = pipeline("image-to-text", model="microsoft/trocr-small-handwritten")
 
 
 
 
 
 
10
 
11
  def get_lines(img_array):
12
- # 1. Convert to grayscale
13
  gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
14
 
15
- # 2. IMPROVED: Adaptive Thresholding
16
- # Instead of a fixed '180', this adjusts to the lighting of the photo
17
  binary = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
18
  cv2.THRESH_BINARY_INV, 21, 10)
19
 
20
- # 3. IMPROVED: Stronger Dilation
21
- # We use a wider kernel to ensure separate words on the same line get joined
22
  kernel = np.ones((5, 100), np.uint8)
23
  dilation = cv2.dilate(binary, kernel, iterations=1)
24
 
25
  contours, _ = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
26
-
27
- # Sort from top to bottom
28
  contours = sorted(contours, key=lambda ctr: cv2.boundingRect(ctr)[1])
29
 
30
  line_images = []
31
  for ctr in contours:
32
  x, y, w, h = cv2.boundingRect(ctr)
33
- # Only keep boxes that look like text (width > height and not tiny)
34
- if w > 30 and h > 10:
35
- roi = img_array[max(0, y-10):y+h+10, max(0, x-10):x+w+10]
36
  if roi.size > 0:
37
  line_images.append(Image.fromarray(roi).convert("RGB"))
38
-
 
 
 
39
  return line_images
40
 
41
- def process_handwriting(input_img):
42
  if input_img is None:
43
- return "Error: No image received."
44
 
45
  lines = get_lines(input_img)
46
 
47
- # DEBUG FALLBACK: If segmentation fails, try the whole image once
48
  if not lines:
49
- print("No lines detected, trying full image...")
50
  try:
51
- full_img_pil = Image.fromarray(input_img).convert("RGB")
52
- prediction = pipe(full_img_pil)
53
- return f"Note: No lines detected. Full image result: {prediction[0]['generated_text']}"
54
- except Exception as e:
55
- return f"Error: Could not process image. Details: {str(e)}"
56
-
57
- results = []
58
- for i, line in enumerate(lines):
 
 
59
  try:
60
- prediction = pipe(line)
61
- text = prediction[0]['generated_text']
 
62
  if text.strip():
63
- results.append(text.strip())
64
- except Exception as e:
65
  continue
 
 
 
66
 
67
- return "\n".join(results) if results else "The model couldn't read any text. Try a clearer image."
68
 
69
  demo = gr.Interface(
70
- fn=process_handwriting,
71
- inputs=gr.Image(type="numpy"),
72
- outputs="text",
73
- title="Handwritten to Typed Text",
74
- description="If output is empty, try cropping the image closer to the text."
75
  )
76
 
77
  if __name__ == "__main__":
 
3
  import numpy as np
4
  from PIL import Image
5
  from transformers import pipeline
6
+ import gc # Essential for cleaning up RAM
7
+ import torch
8
 
9
+ # 1. Use the 'small' model to stay under the 16GB RAM limit
10
+ MODEL_NAME = "microsoft/trocr-small-handwritten"
11
+
12
+ print(f"Loading {MODEL_NAME}...")
13
+ try:
14
+ # Use pipeline for memory-efficient loading
15
+ pipe = pipeline("image-to-text", model=MODEL_NAME, device=-1) # -1 forces CPU
16
+ except Exception as e:
17
+ print(f"Load Error: {e}")
18
 
19
  def get_lines(img_array):
 
20
  gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
21
 
22
+ # Adaptive Thresholding helps in various lighting conditions
 
23
  binary = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
24
  cv2.THRESH_BINARY_INV, 21, 10)
25
 
26
+ # Dilation connects letters into a single line block
 
27
  kernel = np.ones((5, 100), np.uint8)
28
  dilation = cv2.dilate(binary, kernel, iterations=1)
29
 
30
  contours, _ = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
 
 
31
  contours = sorted(contours, key=lambda ctr: cv2.boundingRect(ctr)[1])
32
 
33
  line_images = []
34
  for ctr in contours:
35
  x, y, w, h = cv2.boundingRect(ctr)
36
+ if w > 30 and h > 15:
37
+ # Crop with small padding
38
+ roi = img_array[max(0, y-5):y+h+5, max(0, x-5):x+w+5]
39
  if roi.size > 0:
40
  line_images.append(Image.fromarray(roi).convert("RGB"))
41
+
42
+ # Cleanup OpenCV objects
43
+ del gray, binary, dilation
44
+ gc.collect()
45
  return line_images
46
 
47
+ def process_image(input_img):
48
  if input_img is None:
49
+ return "Please upload an image."
50
 
51
  lines = get_lines(input_img)
52
 
53
+ # If segmentation fails, try the whole image as a backup
54
  if not lines:
 
55
  try:
56
+ full_img = Image.fromarray(input_img).convert("RGB")
57
+ # Downsize for safety
58
+ full_img.thumbnail((800, 800))
59
+ res = pipe(full_img)
60
+ return f"[Single Line Mode]: {res[0]['generated_text']}"
61
+ except:
62
+ return "No text detected."
63
+
64
+ final_results = []
65
+ for line in lines:
66
  try:
67
+ # Process one line
68
+ out = pipe(line)
69
+ text = out[0]['generated_text']
70
  if text.strip():
71
+ final_results.append(text.strip())
72
+ except Exception:
73
  continue
74
+ finally:
75
+ # Force RAM cleanup after EVERY line
76
+ gc.collect()
77
 
78
+ return "\n".join(final_results)
79
 
80
  demo = gr.Interface(
81
+ fn=process_image,
82
+ inputs=gr.Image(type="numpy", label="Upload Handwriting"),
83
+ outputs=gr.Textbox(label="Result"),
84
+ title="Stable Handwritten OCR (v3)",
85
+ description="Optimized for HF Free Tier. Uses TrOCR-Small and aggressive RAM management."
86
  )
87
 
88
  if __name__ == "__main__":