badman99dev commited on
Commit
49af979
Β·
1 Parent(s): 626fc16

πŸš€ Added advanced Hindi OCR with preprocessing

Browse files
Files changed (3) hide show
  1. app.py +30 -0
  2. packages.txt +6 -0
  3. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pytesseract
3
+ from PIL import Image, ImageEnhance, ImageFilter
4
+ import os
5
+
6
+ def ocr_image(img):
7
+ # Image pre-processing
8
+ img = img.convert("L") # Grayscale
9
+ img = img.filter(ImageFilter.SHARPEN) # Sharpen
10
+ enhancer = ImageEnhance.Contrast(img)
11
+ img = enhancer.enhance(2.0) # Increase contrast
12
+
13
+ # OCR with better PSM mode
14
+ custom_config = r'--oem 3 --psm 6' # PSM 6: Assume a single uniform block of text
15
+
16
+ try:
17
+ text = pytesseract.image_to_string(img, lang="hin+eng", config=custom_config)
18
+ return text.strip()
19
+ except Exception as e:
20
+ return f"❌ Error: {str(e)}"
21
+
22
+ demo = gr.Interface(
23
+ fn=ocr_image,
24
+ inputs=gr.Image(type="pil", label="πŸ“· Upload Hindi Image (High Quality)"),
25
+ outputs=gr.Textbox(label="πŸ“ OCR Result"),
26
+ title="🧠 Hindi OCR - High Accuracy",
27
+ description="Upload sharp, clear Hindi text image. This OCR boosts contrast & sharpness before scanning. Powered by Tesseract!"
28
+ )
29
+
30
+ demo.launch()
packages.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ tesseract-ocr
2
+ tesseract-ocr-hin
3
+ libglib2.0-0
4
+ libsm6
5
+ libxrender1
6
+ libxext6
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ pytesseract
2
+ Pillow