File size: 1,708 Bytes
37d91c9 d72f6aa 6cac521 d72f6aa 6cac521 d72f6aa 6cac521 d72f6aa 6cac521 d72f6aa 6cac521 d72f6aa 6cac521 d72f6aa 6cac521 d72f6aa 6cac521 d72f6aa 6cac521 d72f6aa 6cac521 d72f6aa 6cac521 d72f6aa 6cac521 d72f6aa 6cac521 d72f6aa 6cac521 d72f6aa 6cac521 d72f6aa 6cac521 d72f6aa 6cac521 d72f6aa 6cac521 37d91c9 6cac521 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 | import streamlit as st
import cv2
import numpy as np
from PIL import Image
import pytesseract
from transformers import pipeline
# Load summarization model
@st.cache_resource
def load_model():
return pipeline("summarization", model="facebook/bart-large-cnn")
summarizer = load_model()
# Image enhancement function
def enhance_image(image):
img = np.array(image)
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
# Contrast & brightness
enhanced = cv2.convertScaleAbs(img, alpha=1.5, beta=30)
# Denoising
blurred = cv2.GaussianBlur(enhanced, (5, 5), 0)
# Convert to grayscale
gray = cv2.cvtColor(blurred, cv2.COLOR_BGR2GRAY)
# Adaptive thresholding
thresh = cv2.adaptiveThreshold(
gray, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 11, 2
)
return thresh
# OCR Function
def extract_text(image):
text = pytesseract.image_to_string(image)
return text
# Streamlit UI
st.title("📄 SmartDoc AI - Intelligent Document Processor")
uploaded_file = st.file_uploader("Upload a document image", type=["jpg", "png", "jpeg"])
if uploaded_file:
image = Image.open(uploaded_file)
st.subheader("Original Image")
st.image(image, use_column_width=True)
processed = enhance_image(image)
st.subheader("Enhanced Image")
st.image(processed, use_column_width=True)
# OCR
text = extract_text(processed)
st.subheader("Extracted Text")
st.text_area("", text, height=200)
# Summarization
if len(text) > 50:
summary = summarizer(text[:1000], max_length=130, min_length=30, do_sample=False)
st.subheader("Document Summary")
st.write(summary[0]["summary_text"]) |