Spaces:

sameernotes
/

hindi-o

Sleeping

App Files Files Community

sameernotes commited on Mar 19, 2025

Commit

478f262

verified ·

1 Parent(s): ec2a4de

Create app.py

Browse files

Files changed (1) hide show

app.py +75 -0

app.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import gradio as gr
+import cv2
+import numpy as np
+import tensorflow as tf
+import pickle
+import requests
+import io
+import tempfile
+import sakshi_ocr
+# Model & Encoder URLs
+MODEL_URL = "https://huggingface.co/sameernotes/hindi-ocr/resolve/main/hindi_ocr_model.keras"
+ENCODER_URL = "https://huggingface.co/sameernotes/hindi-ocr/resolve/main/label_encoder.pkl"
+# Load model from Hugging Face
+@tf.function
+def load_model():
+    response = requests.get(MODEL_URL)
+    if response.status_code == 200:
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".keras") as temp_model:
+            temp_model.write(response.content)
+            model = tf.keras.models.load_model(temp_model.name)
+        return model
+    else:
+        raise ValueError("Failed to load model from Hugging Face.")
+# Load label encoder from Hugging Face
+def load_label_encoder():
+    response = requests.get(ENCODER_URL)
+    if response.status_code == 200:
+        return pickle.loads(response.content)
+    else:
+        raise ValueError("Failed to load label encoder.")
+# Initialize model and encoder
+model = load_model()
+label_encoder = load_label_encoder()
+# Word detection function
+def detect_words(image):
+    _, binary = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
+    kernel = np.ones((3,3), np.uint8)
+    dilated = cv2.dilate(binary, kernel, iterations=2)
+    contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    word_count = sum(1 for c in contours if cv2.boundingRect(c)[2] > 10 and cv2.boundingRect(c)[3] > 10)
+    return word_count
+# Process image and predict text
+def process_image(image):
+    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
+    word_count = detect_words(gray)
+    img_resized = cv2.resize(gray, (128, 32)) / 255.0
+    img_input = img_resized[np.newaxis, ..., np.newaxis]
+    pred = model.predict(img_input)
+    pred_label_idx = np.argmax(pred)
+    pred_label = label_encoder.inverse_transform([pred_label_idx])[0]
+    return f"Detected Words: {word_count}\nPredicted Text: {pred_label}"
+# Sakshi OCR function
+def run_sakshi_ocr(image):
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp_file:
+        cv2.imwrite(tmp_file.name, image)
+        output = io.StringIO()
+        sakshi_ocr.generate(tmp_file.name, output)
+    return output.getvalue()
+# Gradio Interface
+def ocr_pipeline(image):
+    text_prediction = process_image(image)
+    sakshi_output = run_sakshi_ocr(image)
+    return f"{text_prediction}\n\nSakshi OCR Output:\n{sakshi_output}"
+demo = gr.Interface(fn=ocr_pipeline, inputs=gr.Image(type="numpy"), outputs="text")
+demo.launch()