import streamlit as st import time # Import our custom modules from src.ingestion import extract_text_from_pdf, extract_text_from_image from src.processing import chunk_text from src.analysis import analyze_document # --- 1. PAGE CONFIGURATION & STYLING --- st.set_page_config( page_title="LegalLens AI", layout="wide", initial_sidebar_state="collapsed" ) # Inject Custom CSS for a Professional Look st.markdown(""" """, unsafe_allow_html=True) # --- 2. SIDEBAR --- with st.sidebar: st.header("About LegalLens AI") st.markdown("---") st.markdown(""" **LegalLens AI** is an AI-powered document intelligence platform designed to bring clarity to complex contracts. **How it works:** 1. **Ingestion:** OCR extracts text from PDFs or images. 2. **Processing:** Long documents are intelligently chunked. 3. **Analysis:** Advanced Transformer models summarize content and detect high-risk clauses. """) st.markdown("---") st.caption("Built by Ardhi Gagan.") # --- 3. MAIN APPLICATION INTERFACE --- # Top Banner (No Logo) st.markdown('

LegalLens AI

', unsafe_allow_html=True) st.markdown('

AI Contract Clarity & Risk Assessment

', unsafe_allow_html=True) st.markdown("---") # File Uploader Section with st.container(): st.markdown("### Start Your Analysis") # Markdown handles styles better than st.write for headers st.write("Upload a legal contract (PDF, PNG, or JPG) to receive an instant executive summary and risk report.") uploaded_file = st.file_uploader("", type=["pdf", "png", "jpg", "jpeg"], label_visibility="collapsed") if uploaded_file is not None: # A. INGESTION STEP with st.status("Reading document...", expanded=True) as status: st.write("Initializing OCR engine...") # Determine file type and extract text if uploaded_file.name.lower().endswith('.pdf'): raw_text = extract_text_from_pdf(uploaded_file.read()) else: raw_text = extract_text_from_image(uploaded_file.read()) status.update(label="Document successfully read!", state="complete", expanded=False) # Show preview with st.expander("View Extracted Raw Text"): st.text_area("", raw_text, height=200) st.markdown("
", unsafe_allow_html=True) # Spacer # B. ANALYSIS BUTTON & LOGIC if st.button("Run AI Analysis"): progress_bar = st.progress(0) # Step 1: Chunking with st.status("Performing AI Analysis...", expanded=True) as status: st.write("Splitting document into context-aware chunks (LangChain)...") chunks = chunk_text(raw_text) progress_bar.progress(30) st.write(f"Running inference on {len(chunks)} chunks using BART Transformer...") # Step 2: AI Analysis summary, risks = analyze_document(chunks) progress_bar.progress(100) status.update(label="Analysis Complete!", state="complete", expanded=False) # C. DISPLAY RESULTS st.markdown("---") st.header("Analysis Report") res_col1, res_col2 = st.columns(2, gap="large") with res_col1: # FIX: We put the Header AND Content inside the same HTML block # This removes the empty white bar issue. st.markdown(f"""

Executive Summary

{summary}

""", unsafe_allow_html=True) with res_col2: st.markdown('

Key Risk Assessment

', unsafe_allow_html=True) if not risks: st.markdown('
Clean Bill of Health: No high-risk clauses detected.
', unsafe_allow_html=True) else: st.markdown(f'
Attention Required: {len(risks)} Potential Risks Found
', unsafe_allow_html=True) st.write("Expand below to review specific flagged clauses:") # --- NEW LOGIC: GROUP RISKS BY CATEGORY --- # 1. Create a dictionary to group risks: {"Financial Penalty": [risk1, risk2], ...} risk_groups = {} for risk in risks: label = risk['type'] if label not in risk_groups: risk_groups[label] = [] risk_groups[label].append(risk) # 2. Iterate through each unique category for label, items in risk_groups.items(): # Find the highest confidence score in this group to show on the header max_score = max([item['score'] for item in items]) count = len(items) # Create ONE expander for the Category with st.expander(f"{label} (Highest Confidence: {int(max_score*100)}%)"): # List all instances inside for i, item in enumerate(items): st.markdown(f"**Instance {i+1}:**") st.markdown(f"> *\"...{item['text_snippet']}...\"*") st.caption(f"Confidence Score: {int(item['score']*100)}%") # Add a divider if there are multiple instances if i < count - 1: st.markdown("---") # ------------------------------------------ st.markdown('
', unsafe_allow_html=True)