Spaces:

ardhigagan
/

LegalLens-AI

Sleeping

File size: 8,338 Bytes
import streamlit as st
import time

# Import our custom modules
from src.ingestion import extract_text_from_pdf, extract_text_from_image
from src.processing import chunk_text
from src.analysis import analyze_document

# --- 1. PAGE CONFIGURATION & STYLING ---
st.set_page_config(
    page_title="LegalLens AI",
    layout="wide",
    initial_sidebar_state="collapsed"
)

# Inject Custom CSS for a Professional Look
st.markdown("""

    <style>

        /* Import a modern font */

        @import url('https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;700&display=swap');

        

        html, body, [class*="css"] {

            font-family: 'Roboto', sans-serif;

        }

        

        /* --- HIDE ANCHOR LINKS (The chain icon) --- */

        /* This hides it for H1, H2, H3, etc. */

        [data-testid="stMarkdownContainer"] a.anchor-link {

            display: none !important;

        }

        [data-testid="stHeader"] a.anchor-link {

            display: none !important;

        }



        /* Main Heading Style */

        .main-header {

            color: #4A90E2; 

            font-weight: 700;

            font-size: 3rem;

            margin-bottom: 0px;

        }

        .sub-header {

            color: #888; 

            font-size: 1.2rem;

            margin-top: -10px;

        }



        /* --- VISIBILITY FIX: SOLID WHITE CARDS --- */

        .result-card {

            background-color: #FFFFFF; /* Solid White Background */

            padding: 20px;

            border-radius: 10px;

            box-shadow: 0 4px 6px rgba(0,0,0,0.1); /* Soft shadow */

            margin-bottom: 20px;

        }

        

        /* Force text inside cards to be Dark Grey (Readable) */

        .result-card, .result-card p, .result-card div {

            color: #333333 !important; 

            line-height: 1.6;

        }

        

        /* Risk Box Styling */

        .risk-safe {

            background-color: #E8F5E9; /* Light Green */

            color: #1b5e20; /* Dark Green Text */

            padding: 15px;

            border-radius: 8px;

            border-left: 5px solid #2E7D32;

            font-weight: bold;

        }

        .risk-high {

            background-color: #FFEBEE; /* Light Red */

            color: #b71c1c; /* Dark Red Text */

            padding: 15px;

            border-radius: 8px;

            border-left: 5px solid #C62828;

            font-weight: bold;

            margin-bottom: 10px;

        }



        /* Button Styling */

        .stButton>button {

            background-color: #4A90E2;

            color: white;

            border-radius: 8px;

            padding: 10px 24px;

            font-weight: bold;

            border: none;

        }

        .stButton>button:hover {

            background-color: #357abd;

        }

    </style>

""", unsafe_allow_html=True)

# --- 2. SIDEBAR ---
with st.sidebar:
    st.header("About LegalLens AI")
    st.markdown("---")
    st.markdown("""

    **LegalLens AI** is an AI-powered document intelligence platform designed to bring clarity to complex contracts.

    

    **How it works:**

    1.  **Ingestion:** OCR extracts text from PDFs or images.

    2.  **Processing:** Long documents are intelligently chunked.

    3.  **Analysis:** Advanced Transformer models summarize content and detect high-risk clauses.

    """)
    st.markdown("---")
    st.caption("Built by Ardhi Gagan.")


# --- 3. MAIN APPLICATION INTERFACE ---

# Top Banner (No Logo)
st.markdown('<h1 class="main-header">LegalLens AI</h1>', unsafe_allow_html=True)
st.markdown('<p class="sub-header">AI Contract Clarity & Risk Assessment</p>', unsafe_allow_html=True)

st.markdown("---")

# File Uploader Section
with st.container():
    st.markdown("### Start Your Analysis") # Markdown handles styles better than st.write for headers
    st.write("Upload a legal contract (PDF, PNG, or JPG) to receive an instant executive summary and risk report.")
    uploaded_file = st.file_uploader("", type=["pdf", "png", "jpg", "jpeg"], label_visibility="collapsed")


if uploaded_file is not None:
    # A. INGESTION STEP
    with st.status("Reading document...", expanded=True) as status:
        st.write("Initializing OCR engine...")
        # Determine file type and extract text
        if uploaded_file.name.lower().endswith('.pdf'):
            raw_text = extract_text_from_pdf(uploaded_file.read())
        else:
            raw_text = extract_text_from_image(uploaded_file.read())
        
        status.update(label="Document successfully read!", state="complete", expanded=False)
            
    # Show preview
    with st.expander("View Extracted Raw Text"):
        st.text_area("", raw_text, height=200)

    st.markdown("<br>", unsafe_allow_html=True) # Spacer

    # B. ANALYSIS BUTTON & LOGIC
    if st.button("Run AI Analysis"):
        progress_bar = st.progress(0)
        
        # Step 1: Chunking
        with st.status("Performing AI Analysis...", expanded=True) as status:
            st.write("Splitting document into context-aware chunks (LangChain)...")
            chunks = chunk_text(raw_text)
            progress_bar.progress(30)
            
            st.write(f"Running inference on {len(chunks)} chunks using BART Transformer...")
            # Step 2: AI Analysis
            summary, risks = analyze_document(chunks)
            progress_bar.progress(100)
            status.update(label="Analysis Complete!", state="complete", expanded=False)

       # C. DISPLAY RESULTS
        st.markdown("---")
        st.header("Analysis Report")

        res_col1, res_col2 = st.columns(2, gap="large")

        with res_col1:
            # FIX: We put the Header AND Content inside the same HTML block
            # This removes the empty white bar issue.
            st.markdown(f"""

            <div class="result-card">

                <h3>Executive Summary</h3>

                <p style="font-size: 1rem;">{summary}</p>

            </div>

            """, unsafe_allow_html=True)
            
        with res_col2:
            st.markdown('<div class="result-card"><h3>Key Risk Assessment</h3>', unsafe_allow_html=True)
            
            if not risks:
                st.markdown('<div class="risk-safe">Clean Bill of Health: No high-risk clauses detected.</div>', unsafe_allow_html=True)
            else:
                st.markdown(f'<div class="risk-high">Attention Required: {len(risks)} Potential Risks Found</div>', unsafe_allow_html=True)
                st.write("Expand below to review specific flagged clauses:")
                
                # --- NEW LOGIC: GROUP RISKS BY CATEGORY ---
                # 1. Create a dictionary to group risks: {"Financial Penalty": [risk1, risk2], ...}
                risk_groups = {}
                for risk in risks:
                    label = risk['type']
                    if label not in risk_groups:
                        risk_groups[label] = []
                    risk_groups[label].append(risk)
                
                # 2. Iterate through each unique category
                for label, items in risk_groups.items():
                    # Find the highest confidence score in this group to show on the header
                    max_score = max([item['score'] for item in items])
                    count = len(items)
                    
                    # Create ONE expander for the Category
                    with st.expander(f"{label} (Highest Confidence: {int(max_score*100)}%)"):
                        # List all instances inside
                        for i, item in enumerate(items):
                            st.markdown(f"**Instance {i+1}:**")
                            st.markdown(f"> *\"...{item['text_snippet']}...\"*")
                            st.caption(f"Confidence Score: {int(item['score']*100)}%")
                            # Add a divider if there are multiple instances
                            if i < count - 1:
                                st.markdown("---")
                # ------------------------------------------
            
            st.markdown('</div>', unsafe_allow_html=True)