import streamlit as st
import cv2
import numpy as np
from PIL import Image
import pytesseract
from transformers import pipeline

# Load summarization model
@st.cache_resource
def load_model():
    return pipeline("summarization", model="facebook/bart-large-cnn")

summarizer = load_model()

# Image enhancement function
def enhance_image(image):
    img = np.array(image)
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

    # Contrast & brightness
    enhanced = cv2.convertScaleAbs(img, alpha=1.5, beta=30)

    # Denoising
    blurred = cv2.GaussianBlur(enhanced, (5, 5), 0)

    # Convert to grayscale
    gray = cv2.cvtColor(blurred, cv2.COLOR_BGR2GRAY)

    # Adaptive thresholding
    thresh = cv2.adaptiveThreshold(
        gray, 255,
        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY, 11, 2
    )

    return thresh

# OCR Function
def extract_text(image):
    text = pytesseract.image_to_string(image)
    return text

# Streamlit UI
st.title("📄 SmartDoc AI - Intelligent Document Processor")

uploaded_file = st.file_uploader("Upload a document image", type=["jpg", "png", "jpeg"])

if uploaded_file:
    image = Image.open(uploaded_file)

    st.subheader("Original Image")
    st.image(image, use_column_width=True)

    processed = enhance_image(image)

    st.subheader("Enhanced Image")
    st.image(processed, use_column_width=True)

    # OCR
    text = extract_text(processed)

    st.subheader("Extracted Text")
    st.text_area("", text, height=200)

    # Summarization
    if len(text) > 50:
        summary = summarizer(text[:1000], max_length=130, min_length=30, do_sample=False)
        st.subheader("Document Summary")
        st.write(summary[0]["summary_text"])