import streamlit as st import cv2 import numpy as np from PIL import Image import pytesseract from transformers import pipeline # Load summarization model @st.cache_resource def load_model(): return pipeline("summarization", model="facebook/bart-large-cnn") summarizer = load_model() # Image enhancement function def enhance_image(image): img = np.array(image) img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) # Contrast & brightness enhanced = cv2.convertScaleAbs(img, alpha=1.5, beta=30) # Denoising blurred = cv2.GaussianBlur(enhanced, (5, 5), 0) # Convert to grayscale gray = cv2.cvtColor(blurred, cv2.COLOR_BGR2GRAY) # Adaptive thresholding thresh = cv2.adaptiveThreshold( gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2 ) return thresh # OCR Function def extract_text(image): text = pytesseract.image_to_string(image) return text # Streamlit UI st.title("📄 SmartDoc AI - Intelligent Document Processor") uploaded_file = st.file_uploader("Upload a document image", type=["jpg", "png", "jpeg"]) if uploaded_file: image = Image.open(uploaded_file) st.subheader("Original Image") st.image(image, use_column_width=True) processed = enhance_image(image) st.subheader("Enhanced Image") st.image(processed, use_column_width=True) # OCR text = extract_text(processed) st.subheader("Extracted Text") st.text_area("", text, height=200) # Summarization if len(text) > 50: summary = summarizer(text[:1000], max_length=130, min_length=30, do_sample=False) st.subheader("Document Summary") st.write(summary[0]["summary_text"])