Spaces:
Running
Running
| import streamlit as st | |
| import time | |
| # Import our custom modules | |
| from src.ingestion import extract_text_from_pdf, extract_text_from_image | |
| from src.processing import chunk_text | |
| from src.analysis import analyze_document | |
| # --- 1. PAGE CONFIGURATION & STYLING --- | |
| st.set_page_config( | |
| page_title="LegalLens AI", | |
| layout="wide", | |
| initial_sidebar_state="collapsed" | |
| ) | |
| # Inject Custom CSS for a Professional Look | |
| st.markdown(""" | |
| <style> | |
| /* Import a modern font */ | |
| @import url('https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;700&display=swap'); | |
| html, body, [class*="css"] { | |
| font-family: 'Roboto', sans-serif; | |
| } | |
| /* --- HIDE ANCHOR LINKS (The chain icon) --- */ | |
| /* This hides it for H1, H2, H3, etc. */ | |
| [data-testid="stMarkdownContainer"] a.anchor-link { | |
| display: none !important; | |
| } | |
| [data-testid="stHeader"] a.anchor-link { | |
| display: none !important; | |
| } | |
| /* Main Heading Style */ | |
| .main-header { | |
| color: #4A90E2; | |
| font-weight: 700; | |
| font-size: 3rem; | |
| margin-bottom: 0px; | |
| } | |
| .sub-header { | |
| color: #888; | |
| font-size: 1.2rem; | |
| margin-top: -10px; | |
| } | |
| /* --- VISIBILITY FIX: SOLID WHITE CARDS --- */ | |
| .result-card { | |
| background-color: #FFFFFF; /* Solid White Background */ | |
| padding: 20px; | |
| border-radius: 10px; | |
| box-shadow: 0 4px 6px rgba(0,0,0,0.1); /* Soft shadow */ | |
| margin-bottom: 20px; | |
| } | |
| /* Force text inside cards to be Dark Grey (Readable) */ | |
| .result-card, .result-card p, .result-card div { | |
| color: #333333 !important; | |
| line-height: 1.6; | |
| } | |
| /* Risk Box Styling */ | |
| .risk-safe { | |
| background-color: #E8F5E9; /* Light Green */ | |
| color: #1b5e20; /* Dark Green Text */ | |
| padding: 15px; | |
| border-radius: 8px; | |
| border-left: 5px solid #2E7D32; | |
| font-weight: bold; | |
| } | |
| .risk-high { | |
| background-color: #FFEBEE; /* Light Red */ | |
| color: #b71c1c; /* Dark Red Text */ | |
| padding: 15px; | |
| border-radius: 8px; | |
| border-left: 5px solid #C62828; | |
| font-weight: bold; | |
| margin-bottom: 10px; | |
| } | |
| /* Button Styling */ | |
| .stButton>button { | |
| background-color: #4A90E2; | |
| color: white; | |
| border-radius: 8px; | |
| padding: 10px 24px; | |
| font-weight: bold; | |
| border: none; | |
| } | |
| .stButton>button:hover { | |
| background-color: #357abd; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # --- 2. SIDEBAR --- | |
| with st.sidebar: | |
| st.header("About LegalLens AI") | |
| st.markdown("---") | |
| st.markdown(""" | |
| **LegalLens AI** is an AI-powered document intelligence platform designed to bring clarity to complex contracts. | |
| **How it works:** | |
| 1. **Ingestion:** OCR extracts text from PDFs or images. | |
| 2. **Processing:** Long documents are intelligently chunked. | |
| 3. **Analysis:** Advanced Transformer models summarize content and detect high-risk clauses. | |
| """) | |
| st.markdown("---") | |
| st.caption("Built by Ardhi Gagan.") | |
| # --- 3. MAIN APPLICATION INTERFACE --- | |
| # Top Banner (No Logo) | |
| st.markdown('<h1 class="main-header">LegalLens AI</h1>', unsafe_allow_html=True) | |
| st.markdown('<p class="sub-header">AI Contract Clarity & Risk Assessment</p>', unsafe_allow_html=True) | |
| st.markdown("---") | |
| # File Uploader Section | |
| with st.container(): | |
| st.markdown("### Start Your Analysis") # Markdown handles styles better than st.write for headers | |
| st.write("Upload a legal contract (PDF, PNG, or JPG) to receive an instant executive summary and risk report.") | |
| uploaded_file = st.file_uploader("", type=["pdf", "png", "jpg", "jpeg"], label_visibility="collapsed") | |
| if uploaded_file is not None: | |
| # A. INGESTION STEP | |
| with st.status("Reading document...", expanded=True) as status: | |
| st.write("Initializing OCR engine...") | |
| # Determine file type and extract text | |
| if uploaded_file.name.lower().endswith('.pdf'): | |
| raw_text = extract_text_from_pdf(uploaded_file.read()) | |
| else: | |
| raw_text = extract_text_from_image(uploaded_file.read()) | |
| status.update(label="Document successfully read!", state="complete", expanded=False) | |
| # Show preview | |
| with st.expander("View Extracted Raw Text"): | |
| st.text_area("", raw_text, height=200) | |
| st.markdown("<br>", unsafe_allow_html=True) # Spacer | |
| # B. ANALYSIS BUTTON & LOGIC | |
| if st.button("Run AI Analysis"): | |
| progress_bar = st.progress(0) | |
| # Step 1: Chunking | |
| with st.status("Performing AI Analysis...", expanded=True) as status: | |
| st.write("Splitting document into context-aware chunks (LangChain)...") | |
| chunks = chunk_text(raw_text) | |
| progress_bar.progress(30) | |
| st.write(f"Running inference on {len(chunks)} chunks using BART Transformer...") | |
| # Step 2: AI Analysis | |
| summary, risks = analyze_document(chunks) | |
| progress_bar.progress(100) | |
| status.update(label="Analysis Complete!", state="complete", expanded=False) | |
| # C. DISPLAY RESULTS | |
| st.markdown("---") | |
| st.header("Analysis Report") | |
| res_col1, res_col2 = st.columns(2, gap="large") | |
| with res_col1: | |
| # FIX: We put the Header AND Content inside the same HTML block | |
| # This removes the empty white bar issue. | |
| st.markdown(f""" | |
| <div class="result-card"> | |
| <h3>Executive Summary</h3> | |
| <p style="font-size: 1rem;">{summary}</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| with res_col2: | |
| st.markdown('<div class="result-card"><h3>Key Risk Assessment</h3>', unsafe_allow_html=True) | |
| if not risks: | |
| st.markdown('<div class="risk-safe">Clean Bill of Health: No high-risk clauses detected.</div>', unsafe_allow_html=True) | |
| else: | |
| st.markdown(f'<div class="risk-high">Attention Required: {len(risks)} Potential Risks Found</div>', unsafe_allow_html=True) | |
| st.write("Expand below to review specific flagged clauses:") | |
| # --- NEW LOGIC: GROUP RISKS BY CATEGORY --- | |
| # 1. Create a dictionary to group risks: {"Financial Penalty": [risk1, risk2], ...} | |
| risk_groups = {} | |
| for risk in risks: | |
| label = risk['type'] | |
| if label not in risk_groups: | |
| risk_groups[label] = [] | |
| risk_groups[label].append(risk) | |
| # 2. Iterate through each unique category | |
| for label, items in risk_groups.items(): | |
| # Find the highest confidence score in this group to show on the header | |
| max_score = max([item['score'] for item in items]) | |
| count = len(items) | |
| # Create ONE expander for the Category | |
| with st.expander(f"{label} (Highest Confidence: {int(max_score*100)}%)"): | |
| # List all instances inside | |
| for i, item in enumerate(items): | |
| st.markdown(f"**Instance {i+1}:**") | |
| st.markdown(f"> *\"...{item['text_snippet']}...\"*") | |
| st.caption(f"Confidence Score: {int(item['score']*100)}%") | |
| # Add a divider if there are multiple instances | |
| if i < count - 1: | |
| st.markdown("---") | |
| # ------------------------------------------ | |
| st.markdown('</div>', unsafe_allow_html=True) |