Afeefa123's picture
Update src/streamlit_app.py
6cac521 verified
import streamlit as st
import cv2
import numpy as np
from PIL import Image
import pytesseract
from transformers import pipeline
# Load summarization model
@st.cache_resource
def load_model():
return pipeline("summarization", model="facebook/bart-large-cnn")
summarizer = load_model()
# Image enhancement function
def enhance_image(image):
img = np.array(image)
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
# Contrast & brightness
enhanced = cv2.convertScaleAbs(img, alpha=1.5, beta=30)
# Denoising
blurred = cv2.GaussianBlur(enhanced, (5, 5), 0)
# Convert to grayscale
gray = cv2.cvtColor(blurred, cv2.COLOR_BGR2GRAY)
# Adaptive thresholding
thresh = cv2.adaptiveThreshold(
gray, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 11, 2
)
return thresh
# OCR Function
def extract_text(image):
text = pytesseract.image_to_string(image)
return text
# Streamlit UI
st.title("๐Ÿ“„ SmartDoc AI - Intelligent Document Processor")
uploaded_file = st.file_uploader("Upload a document image", type=["jpg", "png", "jpeg"])
if uploaded_file:
image = Image.open(uploaded_file)
st.subheader("Original Image")
st.image(image, use_column_width=True)
processed = enhance_image(image)
st.subheader("Enhanced Image")
st.image(processed, use_column_width=True)
# OCR
text = extract_text(processed)
st.subheader("Extracted Text")
st.text_area("", text, height=200)
# Summarization
if len(text) > 50:
summary = summarizer(text[:1000], max_length=130, min_length=30, do_sample=False)
st.subheader("Document Summary")
st.write(summary[0]["summary_text"])