import gradio as gr import cv2 import numpy as np from PIL import Image import pytesseract from transformers import pipeline # ------------------------------- # Load Summarization Model # ------------------------------- def load_model(): return pipeline("summarization", model="facebook/bart-large-cnn") summarizer = load_model() # ------------------------------- # Image Enhancement # ------------------------------- def enhance_image(image): img = np.array(image) img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) enhanced = cv2.convertScaleAbs(img, alpha=1.5, beta=30) blurred = cv2.GaussianBlur(enhanced, (5, 5), 0) gray = cv2.cvtColor(blurred, cv2.COLOR_BGR2GRAY) thresh = cv2.adaptiveThreshold( gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2 ) return thresh # ------------------------------- # OCR # ------------------------------- def extract_text(image): return pytesseract.image_to_string(image) # ------------------------------- # Main Pipeline # ------------------------------- def process_document(input_image): if input_image is None: return None, "", "" enhanced = enhance_image(input_image) text = extract_text(enhanced) summary_text = "Text too short for summarization." if len(text) > 50: summary = summarizer(text[:1000], max_length=130, min_length=30, do_sample=False) summary_text = summary[0]["summary_text"] return enhanced, text, summary_text # ------------------------------- # Gradio UI # ------------------------------- with gr.Blocks() as demo: gr.Markdown("# SmartDoc AI - Intelligent Document Processor") with gr.Row(): input_img = gr.Image(type="pil", label="Upload Document Image") with gr.Row(): enhanced_output = gr.Image(label="Enhanced Image") extracted_text = gr.Textbox(label="Extracted Text", lines=10) summary = gr.Textbox(label="Document Summary", lines=6) btn = gr.Button("Process Document") btn.click( fn=process_document, inputs=input_img, outputs=[enhanced_output, extracted_text, summary] ) demo.launch()