File size: 1,708 Bytes
37d91c9
d72f6aa
 
 
6cac521
 
d72f6aa
6cac521
 
 
 
d72f6aa
6cac521
d72f6aa
6cac521
 
 
 
d72f6aa
6cac521
 
d72f6aa
6cac521
 
d72f6aa
6cac521
 
d72f6aa
6cac521
 
 
 
 
 
d72f6aa
6cac521
d72f6aa
6cac521
 
 
 
d72f6aa
6cac521
 
d72f6aa
6cac521
d72f6aa
6cac521
 
d72f6aa
6cac521
 
d72f6aa
6cac521
d72f6aa
6cac521
 
d72f6aa
6cac521
 
d72f6aa
6cac521
 
37d91c9
6cac521
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import streamlit as st
import cv2
import numpy as np
from PIL import Image
import pytesseract
from transformers import pipeline

# Load summarization model
@st.cache_resource
def load_model():
    return pipeline("summarization", model="facebook/bart-large-cnn")

summarizer = load_model()

# Image enhancement function
def enhance_image(image):
    img = np.array(image)
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

    # Contrast & brightness
    enhanced = cv2.convertScaleAbs(img, alpha=1.5, beta=30)

    # Denoising
    blurred = cv2.GaussianBlur(enhanced, (5, 5), 0)

    # Convert to grayscale
    gray = cv2.cvtColor(blurred, cv2.COLOR_BGR2GRAY)

    # Adaptive thresholding
    thresh = cv2.adaptiveThreshold(
        gray, 255,
        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY, 11, 2
    )

    return thresh

# OCR Function
def extract_text(image):
    text = pytesseract.image_to_string(image)
    return text

# Streamlit UI
st.title("📄 SmartDoc AI - Intelligent Document Processor")

uploaded_file = st.file_uploader("Upload a document image", type=["jpg", "png", "jpeg"])

if uploaded_file:
    image = Image.open(uploaded_file)

    st.subheader("Original Image")
    st.image(image, use_column_width=True)

    processed = enhance_image(image)

    st.subheader("Enhanced Image")
    st.image(processed, use_column_width=True)

    # OCR
    text = extract_text(processed)

    st.subheader("Extracted Text")
    st.text_area("", text, height=200)

    # Summarization
    if len(text) > 50:
        summary = summarizer(text[:1000], max_length=130, min_length=30, do_sample=False)
        st.subheader("Document Summary")
        st.write(summary[0]["summary_text"])