import streamlit as st import time # Import our custom modules from src.ingestion import extract_text_from_pdf, extract_text_from_image from src.processing import chunk_text from src.analysis import analyze_document # --- 1. PAGE CONFIGURATION & STYLING --- st.set_page_config( page_title="LegalLens AI", layout="wide", initial_sidebar_state="collapsed" ) # Inject Custom CSS for a Professional Look st.markdown(""" """, unsafe_allow_html=True) # --- 2. SIDEBAR --- with st.sidebar: st.header("About LegalLens AI") st.markdown("---") st.markdown(""" **LegalLens AI** is an AI-powered document intelligence platform designed to bring clarity to complex contracts. **How it works:** 1. **Ingestion:** OCR extracts text from PDFs or images. 2. **Processing:** Long documents are intelligently chunked. 3. **Analysis:** Advanced Transformer models summarize content and detect high-risk clauses. """) st.markdown("---") st.caption("Built by Ardhi Gagan.") # --- 3. MAIN APPLICATION INTERFACE --- # Top Banner (No Logo) st.markdown('
AI Contract Clarity & Risk Assessment
', unsafe_allow_html=True) st.markdown("---") # File Uploader Section with st.container(): st.markdown("### Start Your Analysis") # Markdown handles styles better than st.write for headers st.write("Upload a legal contract (PDF, PNG, or JPG) to receive an instant executive summary and risk report.") uploaded_file = st.file_uploader("", type=["pdf", "png", "jpg", "jpeg"], label_visibility="collapsed") if uploaded_file is not None: # A. INGESTION STEP with st.status("Reading document...", expanded=True) as status: st.write("Initializing OCR engine...") # Determine file type and extract text if uploaded_file.name.lower().endswith('.pdf'): raw_text = extract_text_from_pdf(uploaded_file.read()) else: raw_text = extract_text_from_image(uploaded_file.read()) status.update(label="Document successfully read!", state="complete", expanded=False) # Show preview with st.expander("View Extracted Raw Text"): st.text_area("", raw_text, height=200) st.markdown("{summary}