| import streamlit as st |
| import cv2 |
| import numpy as np |
| from PIL import Image |
| import pytesseract |
| from transformers import pipeline |
|
|
| |
| @st.cache_resource |
| def load_model(): |
| return pipeline("summarization", model="facebook/bart-large-cnn") |
|
|
| summarizer = load_model() |
|
|
| |
| def enhance_image(image): |
| img = np.array(image) |
| img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) |
|
|
| |
| enhanced = cv2.convertScaleAbs(img, alpha=1.5, beta=30) |
|
|
| |
| blurred = cv2.GaussianBlur(enhanced, (5, 5), 0) |
|
|
| |
| gray = cv2.cvtColor(blurred, cv2.COLOR_BGR2GRAY) |
|
|
| |
| thresh = cv2.adaptiveThreshold( |
| gray, 255, |
| cv2.ADAPTIVE_THRESH_GAUSSIAN_C, |
| cv2.THRESH_BINARY, 11, 2 |
| ) |
|
|
| return thresh |
|
|
| |
| def extract_text(image): |
| text = pytesseract.image_to_string(image) |
| return text |
|
|
| |
| st.title("๐ SmartDoc AI - Intelligent Document Processor") |
|
|
| uploaded_file = st.file_uploader("Upload a document image", type=["jpg", "png", "jpeg"]) |
|
|
| if uploaded_file: |
| image = Image.open(uploaded_file) |
|
|
| st.subheader("Original Image") |
| st.image(image, use_column_width=True) |
|
|
| processed = enhance_image(image) |
|
|
| st.subheader("Enhanced Image") |
| st.image(processed, use_column_width=True) |
|
|
| |
| text = extract_text(processed) |
|
|
| st.subheader("Extracted Text") |
| st.text_area("", text, height=200) |
|
|
| |
| if len(text) > 50: |
| summary = summarizer(text[:1000], max_length=130, min_length=30, do_sample=False) |
| st.subheader("Document Summary") |
| st.write(summary[0]["summary_text"]) |