Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from layout_extractor import convert_pdf_to_images, analyze_layout, extract_text_from_blocks, extract_key_values | |
| from processor import load_images, analyze_layout, extract_text_from_blocks, rule_based_kv_extraction | |
| import json | |
| st.set_page_config(page_title="Document AI", layout="wide") | |
| st.title("🧠 AI-Driven Document Layout & Info Extractor") | |
| uploaded_file = st.file_uploader("Upload a PDF or Image", type=["pdf", "png", "jpg", "jpeg"], key="upload1") | |
| if uploaded_file: | |
| images = load_images(uploaded_file) | |
| for i, image in enumerate(images): | |
| st.subheader(f"Page {i+1}") | |
| st.image(image, use_column_width=True) | |
| with st.spinner("Analyzing layout..."): | |
| layout = analyze_layout(image) | |
| blocks = extract_text_from_blocks(image, layout) | |
| kv_data = rule_based_kv_extraction(blocks) | |
| st.success("Done! Here's what we found:") | |
| st.json(kv_data) | |
| st.subheader("✏️ Edit Extracted Fields") | |
| edited_data = {} | |
| for key, value in kv_data.items(): | |
| edited_data[key] = st.text_input(f"{key}", value) | |
| st.download_button("⬇️ Download JSON", data=json.dumps(edited_data, indent=2), | |
| file_name="extracted_data.json", mime="application/json") | |
| with st.expander("🔍 All Detected Segments"): | |
| for b in blocks: | |
| st.markdown(f"**{b['type']}**: {b['text'][:150]}...") | |
| st.title("📄 AI-Driven Document Layout Analyzer") | |
| uploaded_file = st.file_uploader("Upload a PDF or Image", type=["pdf", "png", "jpg", "jpeg"], key="upload1") | |
| if uploaded_file: | |
| if uploaded_file.name.endswith(".pdf"): | |
| images = convert_pdf_to_images(uploaded_file) | |
| else: | |
| from PIL import Image | |
| images = [Image.open(uploaded_file)] | |
| for i, image in enumerate(images): | |
| st.image(image, caption=f"Page {i+1}", use_column_width=True) | |
| layout = analyze_layout(image) | |
| blocks = extract_text_from_blocks(image, layout) | |
| key_values = extract_key_values(blocks) | |
| st.subheader("Extracted Key Data") | |
| st.json(key_values) | |
| st.subheader("All Segments") | |
| for block in blocks: | |
| st.markdown(f"**{block['type']}**: {block['text'][:200]}...") | |
| st.download_button("Download JSON", data=json.dumps(key_values, indent=2), file_name="extracted_data.json") |