import gradio as gr import torch from PIL import Image from unsloth import FastVisionModel from transformers import AutoModel import os print("Loading model...") model, tokenizer = FastVisionModel.from_pretrained( "https://huggingface.co/nomypython/urdu-ocr-deepseek", # ← Change this! load_in_4bit=True, auto_model=AutoModel, trust_remote_code=True, use_gradient_checkpointing="unsloth", ) FastVisionModel.for_inference(model) print("✓ Model loaded!") def extract_urdu_text(image): if image is None: return "⚠️ Please upload an image!" try: temp_path = "temp.png" image.save(temp_path) result = model.infer( tokenizer, prompt="\nExtract Urdu text from this image:", image_file=temp_path, output_path="./temp_results", image_size=640, base_size=640, crop_mode=False, save_results=False, ) if os.path.exists(temp_path): os.remove(temp_path) return result if result else "⚠️ No text detected" except Exception as e: return f"❌ Error: {str(e)}" with gr.Blocks(title="Urdu OCR") as demo: gr.Markdown("# 🔤 Urdu OCR - اردو او سی آر") with gr.Row(): with gr.Column(): image_input = gr.Image(type="pil", label="Upload Image") extract_btn = gr.Button("🚀 Extract Text", variant="primary") with gr.Column(): text_output = gr.Textbox( label="Extracted Text", lines=10, rtl=True ) extract_btn.click(extract_urdu_text, image_input, text_output) image_input.change(extract_urdu_text, image_input, text_output) demo.launch()