import os import io import sys import cv2 import base64 import pickle import numpy as np import tensorflow as tf import matplotlib.pyplot as plt import matplotlib.font_manager as fm import tempfile import sakshi_ocr from fastapi import FastAPI, File, UploadFile, HTTPException from fastapi.responses import HTMLResponse, JSONResponse # Define paths to your assets (update these if necessary) MODEL_PATH = 'hindi_ocr_model.keras' ENCODER_PATH = 'label_encoder.pkl' FONT_PATH = 'NotoSansDevanagari-Regular.ttf' # Load custom font if available if os.path.exists(FONT_PATH): fm.fontManager.addfont(FONT_PATH) plt.rcParams['font.family'] = 'Noto Sans Devanagari' else: print("Custom font not found. Using default font.") # Load the OCR model def load_model(): if not os.path.exists(MODEL_PATH): raise FileNotFoundError(f"Model file not found at {MODEL_PATH}") return tf.keras.models.load_model(MODEL_PATH) # Load the label encoder def load_label_encoder(): if not os.path.exists(ENCODER_PATH): raise FileNotFoundError(f"Label encoder file not found at {ENCODER_PATH}") with open(ENCODER_PATH, 'rb') as f: return pickle.load(f) # Global loading so they persist across requests model = load_model() label_encoder = load_label_encoder() # Function for word detection def detect_words(image): # Assume input is a grayscale image _, binary = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) kernel = np.ones((3, 3), np.uint8) dilated = cv2.dilate(binary, kernel, iterations=2) contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) word_img = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR) word_count = 0 for contour in contours: x, y, w, h = cv2.boundingRect(contour) if w > 10 and h > 10: cv2.rectangle(word_img, (x, y), (x+w, y+h), (0, 255, 0), 2) word_count += 1 return word_img, word_count # Function to run Sakshi OCR and capture its output def run_sakshi_ocr(image_path): buffer = io.StringIO() old_stdout = sys.stdout sys.stdout = buffer try: sakshi_ocr.generate(image_path) finally: sys.stdout = old_stdout return buffer.getvalue() # Utility function: convert image (numpy array) to a base64 encoded string def image_to_base64(image, ext=".png"): success, encoded_image = cv2.imencode(ext, image) if not success: return None return base64.b64encode(encoded_image).decode('utf-8') # Initialize FastAPI app app = FastAPI(title="Hindi OCR App by sakshi") @app.get("/", response_class=HTMLResponse) async def root(): html_content = """