Spaces:

sameernotes
/

hindi-o

Sleeping

App Files Files Community

sameernotes commited on Mar 19, 2025

Commit

e84c69b

verified ·

1 Parent(s): ccfdc8b

Update app.py

Browse files

Files changed (1) hide show

app.py +118 -44

app.py CHANGED Viewed

@@ -3,36 +3,56 @@ import cv2
 import numpy as np
 import tensorflow as tf
 import pickle
-import requests
 import io
 import tempfile
-import sakshi_ocr
-# Model & Encoder URLs
 MODEL_URL = "https://huggingface.co/sameernotes/hindi-ocr/resolve/main/hindi_ocr_model.keras"
 ENCODER_URL = "https://huggingface.co/sameernotes/hindi-ocr/resolve/main/label_encoder.pkl"
-# Load model from Hugging Face
-@tf.function
 def load_model():
-    response = requests.get(MODEL_URL)
-    if response.status_code == 200:
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".keras") as temp_model:
-            temp_model.write(response.content)
-            model = tf.keras.models.load_model(temp_model.name)
-        return model
-    else:
-        raise ValueError("Failed to load model from Hugging Face.")
-# Load label encoder from Hugging Face
 def load_label_encoder():
-    response = requests.get(ENCODER_URL)
-    if response.status_code == 200:
-        return pickle.loads(response.content)
-    else:
-        raise ValueError("Failed to load label encoder.")
-# Initialize model and encoder
 model = load_model()
 label_encoder = load_label_encoder()
@@ -42,34 +62,88 @@ def detect_words(image):
     kernel = np.ones((3,3), np.uint8)
     dilated = cv2.dilate(binary, kernel, iterations=2)
     contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-    word_count = sum(1 for c in contours if cv2.boundingRect(c)[2] > 10 and cv2.boundingRect(c)[3] > 10)
-    return word_count
-# Process image and predict text
 def process_image(image):
-    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
-    word_count = detect_words(gray)
-    img_resized = cv2.resize(gray, (128, 32)) / 255.0
-    img_input = img_resized[np.newaxis, ..., np.newaxis]
-    pred = model.predict(img_input)
-    pred_label_idx = np.argmax(pred)
-    pred_label = label_encoder.inverse_transform([pred_label_idx])[0]
-    return f"Detected Words: {word_count}\nPredicted Text: {pred_label}"
-# Sakshi OCR function
-def run_sakshi_ocr(image):
     with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp_file:
-        cv2.imwrite(tmp_file.name, image)
-        output = io.StringIO()
-        sakshi_ocr.generate(tmp_file.name, output)
-    return output.getvalue()
 # Gradio Interface
-def ocr_pipeline(image):
-    text_prediction = process_image(image)
-    sakshi_output = run_sakshi_ocr(image)
-    return f"{text_prediction}\n\nSakshi OCR Output:\n{sakshi_output}"
-demo = gr.Interface(fn=ocr_pipeline, inputs=gr.Image(type="numpy"), outputs="text")
-demo.launch()

 import numpy as np
 import tensorflow as tf
 import pickle
+import matplotlib.pyplot as plt
+import matplotlib.font_manager as fm
+import sakshi_ocr
+import os
 import io
+import sys
 import tempfile
+import requests
+# URLs for the model and encoder hosted on Hugging Face
 MODEL_URL = "https://huggingface.co/sameernotes/hindi-ocr/resolve/main/hindi_ocr_model.keras"
 ENCODER_URL = "https://huggingface.co/sameernotes/hindi-ocr/resolve/main/label_encoder.pkl"
+FONT_URL = "https://noto-website-2.storage.googleapis.com/pkgs/NotoSansDevanagari-Regular.ttf"  # Optional font
+# Download model and encoder
+def download_file(url, dest):
+    response = requests.get(url)
+    with open(dest, 'wb') as f:
+        f.write(response.content)
+# Paths for local storage in Hugging Face Spaces
+MODEL_PATH = "hindi_ocr_model.keras"
+ENCODER_PATH = "label_encoder.pkl"
+FONT_PATH = "NotoSansDevanagari-Regular.ttf"
+# Download models and font if not already present
+if not os.path.exists(MODEL_PATH):
+    download_file(MODEL_URL, MODEL_PATH)
+if not os.path.exists(ENCODER_PATH):
+    download_file(ENCODER_URL, ENCODER_PATH)
+if not os.path.exists(FONT_PATH):
+    download_file(FONT_URL, FONT_PATH)
+# Load the custom font if available
+if os.path.exists(FONT_PATH):
+    fm.fontManager.addfont(FONT_PATH)
+    plt.rcParams['font.family'] = 'Noto Sans Devanagari'
+# Load the model and encoder
 def load_model():
+    if not os.path.exists(MODEL_PATH):
+        return None
+    return tf.keras.models.load_model(MODEL_PATH)
 def load_label_encoder():
+    if not os.path.exists(ENCODER_PATH):
+        return None
+    with open(ENCODER_PATH, 'rb') as f:
+        return pickle.load(f)
 model = load_model()
 label_encoder = load_label_encoder()
     kernel = np.ones((3,3), np.uint8)
     dilated = cv2.dilate(binary, kernel, iterations=2)
     contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    word_img = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
+    word_count = 0
+    for contour in contours:
+        x, y, w, h = cv2.boundingRect(contour)
+        if w > 10 and h > 10:
+            cv2.rectangle(word_img, (x, y), (x+w, y+h), (0, 255, 0), 2)
+            word_count += 1
+    return word_img, word_count
+# Sakshi OCR output capture
+def run_sakshi_ocr(image_path):
+    buffer = io.StringIO()
+    old_stdout = sys.stdout
+    sys.stdout = buffer
+    try:
+        sakshi_ocr.generate(image_path)
+    finally:
+        sys.stdout = old_stdout
+    return buffer.getvalue()
+# Main OCR processing function
 def process_image(image):
+    if image is None:
+        return "Error: No image provided", None, 0, "No prediction available"
+    # Convert PIL image to OpenCV format (grayscale)
+    img = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
+    # Word detection
+    word_detected_img, word_count = detect_words(img)
+    # First OCR model prediction
+    try:
+        img_resized = cv2.resize(img, (128, 32))
+        img_norm = img_resized / 255.0
+        img_input = img_norm[np.newaxis, ..., np.newaxis]  # Shape: (1, 32, 128, 1)
+        if model is not None and label_encoder is not None:
+            pred = model.predict(img_input)
+            pred_label_idx = np.argmax(pred)
+            pred_label = label_encoder.inverse_transform([pred_label_idx])[0]
+            # Create plot with prediction
+            fig, ax = plt.subplots()
+            ax.imshow(img, cmap='gray')
+            ax.set_title(f"Predicted: {pred_label}", fontsize=12)
+            ax.axis('off')
+            plt.savefig("temp_plot.png")
+            plt.close()
+            pred_image = cv2.imread("temp_plot.png")
+            os.remove("temp_plot.png")
+        else:
+            pred_image = None
+            pred_label = "Model or encoder not loaded"
+    except Exception as e:
+        pred_image = None
+        pred_label = f"Error: {str(e)}"
+    # Sakshi OCR processing
     with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp_file:
+        cv2.imwrite(tmp_file.name, img)
+        sakshi_output = run_sakshi_ocr(tmp_file.name)
+        os.remove(tmp_file.name)
+    return sakshi_output, word_detected_img, word_count, pred_image
 # Gradio Interface
+interface = gr.Interface(
+    fn=process_image,
+    inputs=gr.Image(type="pil", label="Upload an Image"),
+    outputs=[
+        gr.Textbox(label="Sakshi OCR Output"),
+        gr.Image(label="Word Detection", type="numpy"),
+        gr.Number(label="Word Count"),
+        gr.Image(label="Hindi OCR Prediction", type="numpy")
+    ],
+    title="Hindi OCR App by Sakshi",
+    description="Upload an image to perform Hindi OCR and word detection."
+)
+# Launch the app
+interface.launch()