sameernotes commited on
Commit
478f262
·
verified ·
1 Parent(s): ec2a4de

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -0
app.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import cv2
3
+ import numpy as np
4
+ import tensorflow as tf
5
+ import pickle
6
+ import requests
7
+ import io
8
+ import tempfile
9
+ import sakshi_ocr
10
+
11
+ # Model & Encoder URLs
12
+ MODEL_URL = "https://huggingface.co/sameernotes/hindi-ocr/resolve/main/hindi_ocr_model.keras"
13
+ ENCODER_URL = "https://huggingface.co/sameernotes/hindi-ocr/resolve/main/label_encoder.pkl"
14
+
15
+ # Load model from Hugging Face
16
+ @tf.function
17
+ def load_model():
18
+ response = requests.get(MODEL_URL)
19
+ if response.status_code == 200:
20
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".keras") as temp_model:
21
+ temp_model.write(response.content)
22
+ model = tf.keras.models.load_model(temp_model.name)
23
+ return model
24
+ else:
25
+ raise ValueError("Failed to load model from Hugging Face.")
26
+
27
+ # Load label encoder from Hugging Face
28
+ def load_label_encoder():
29
+ response = requests.get(ENCODER_URL)
30
+ if response.status_code == 200:
31
+ return pickle.loads(response.content)
32
+ else:
33
+ raise ValueError("Failed to load label encoder.")
34
+
35
+ # Initialize model and encoder
36
+ model = load_model()
37
+ label_encoder = load_label_encoder()
38
+
39
+ # Word detection function
40
+ def detect_words(image):
41
+ _, binary = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
42
+ kernel = np.ones((3,3), np.uint8)
43
+ dilated = cv2.dilate(binary, kernel, iterations=2)
44
+ contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
45
+ word_count = sum(1 for c in contours if cv2.boundingRect(c)[2] > 10 and cv2.boundingRect(c)[3] > 10)
46
+ return word_count
47
+
48
+ # Process image and predict text
49
+ def process_image(image):
50
+ gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
51
+ word_count = detect_words(gray)
52
+ img_resized = cv2.resize(gray, (128, 32)) / 255.0
53
+ img_input = img_resized[np.newaxis, ..., np.newaxis]
54
+ pred = model.predict(img_input)
55
+ pred_label_idx = np.argmax(pred)
56
+ pred_label = label_encoder.inverse_transform([pred_label_idx])[0]
57
+ return f"Detected Words: {word_count}\nPredicted Text: {pred_label}"
58
+
59
+ # Sakshi OCR function
60
+ def run_sakshi_ocr(image):
61
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp_file:
62
+ cv2.imwrite(tmp_file.name, image)
63
+ output = io.StringIO()
64
+ sakshi_ocr.generate(tmp_file.name, output)
65
+ return output.getvalue()
66
+
67
+ # Gradio Interface
68
+ def ocr_pipeline(image):
69
+ text_prediction = process_image(image)
70
+ sakshi_output = run_sakshi_ocr(image)
71
+ return f"{text_prediction}\n\nSakshi OCR Output:\n{sakshi_output}"
72
+
73
+ demo = gr.Interface(fn=ocr_pipeline, inputs=gr.Image(type="numpy"), outputs="text")
74
+
75
+ demo.launch()