hindi-o / app.py
sameernotes's picture
Update app.py
fbe00ca verified
import gradio as gr
import cv2
import numpy as np
import tensorflow as tf
import pickle
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
import sakshi_ocr
import os
import io
import sys
import tempfile
import requests
# URLs for the model and encoder hosted on Hugging Face
MODEL_URL = "https://huggingface.co/sameernotes/hindi-ocr/resolve/main/hindi_ocr_model.keras"
ENCODER_URL = "https://huggingface.co/sameernotes/hindi-ocr/resolve/main/label_encoder.pkl"
FONT_URL = "https://huggingface.co/sameernotes/hindi-ocr/resolve/main/NotoSansDevanagari-Regular.ttf" # Optional font
# Download model and encoder
def download_file(url, dest):
response = requests.get(url)
with open(dest, 'wb') as f:
f.write(response.content)
# Paths for local storage in Hugging Face Spaces
MODEL_PATH = "hindi_ocr_model.keras"
ENCODER_PATH = "label_encoder.pkl"
FONT_PATH = "NotoSansDevanagari-Regular.ttf"
# Download models and font if not already present
if not os.path.exists(MODEL_PATH):
download_file(MODEL_URL, MODEL_PATH)
if not os.path.exists(ENCODER_PATH):
download_file(ENCODER_URL, ENCODER_PATH)
if not os.path.exists(FONT_PATH):
download_file(FONT_URL, FONT_PATH)
# Load the custom font if available
if os.path.exists(FONT_PATH):
fm.fontManager.addfont(FONT_PATH)
plt.rcParams['font.family'] = 'Noto Sans Devanagari'
# Load the model and encoder
def load_model():
if not os.path.exists(MODEL_PATH):
return None
return tf.keras.models.load_model(MODEL_PATH)
def load_label_encoder():
if not os.path.exists(ENCODER_PATH):
return None
with open(ENCODER_PATH, 'rb') as f:
return pickle.load(f)
model = load_model()
label_encoder = load_label_encoder()
# Word detection function
def detect_words(image):
_, binary = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
kernel = np.ones((3,3), np.uint8)
dilated = cv2.dilate(binary, kernel, iterations=2)
contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
word_img = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
word_count = 0
for contour in contours:
x, y, w, h = cv2.boundingRect(contour)
if w > 10 and h > 10:
cv2.rectangle(word_img, (x, y), (x+w, y+h), (0, 255, 0), 2)
word_count += 1
return word_img, word_count
# Sakshi OCR output capture
def run_sakshi_ocr(image_path):
buffer = io.StringIO()
old_stdout = sys.stdout
sys.stdout = buffer
try:
sakshi_ocr.generate(image_path)
finally:
sys.stdout = old_stdout
return buffer.getvalue()
# Main OCR processing function
def process_image(image):
if image is None:
return "Error: No image provided", None, 0, "No prediction available"
# Convert PIL image to OpenCV format (grayscale)
img = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
# Word detection
word_detected_img, word_count = detect_words(img)
# First OCR model prediction
try:
img_resized = cv2.resize(img, (128, 32))
img_norm = img_resized / 255.0
img_input = img_norm[np.newaxis, ..., np.newaxis] # Shape: (1, 32, 128, 1)
if model is not None and label_encoder is not None:
pred = model.predict(img_input)
pred_label_idx = np.argmax(pred)
pred_label = label_encoder.inverse_transform([pred_label_idx])[0]
# Create plot with prediction
fig, ax = plt.subplots()
ax.imshow(img, cmap='gray')
ax.set_title(f"Predicted: {pred_label}", fontsize=12)
ax.axis('off')
plt.savefig("temp_plot.png")
plt.close()
pred_image = cv2.imread("temp_plot.png")
os.remove("temp_plot.png")
else:
pred_image = None
pred_label = "Model or encoder not loaded"
except Exception as e:
pred_image = None
pred_label = f"Error: {str(e)}"
# Sakshi OCR processing
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp_file:
cv2.imwrite(tmp_file.name, img)
sakshi_output = run_sakshi_ocr(tmp_file.name)
os.remove(tmp_file.name)
return sakshi_output, word_detected_img, word_count, pred_image
# Gradio Interface
interface = gr.Interface(
fn=process_image,
inputs=gr.Image(type="pil", label="Upload an Image"),
outputs=[
gr.Textbox(label="Sakshi OCR Output"),
gr.Image(label="Word Detection", type="numpy"),
gr.Number(label="Word Count"),
gr.Image(label="Hindi OCR Prediction", type="numpy")
],
title="Hindi OCR App by Sakshi",
description="Upload an image to perform Hindi OCR and word detection."
)
# Launch the app
interface.launch()