LegalLens-AI / app.py
ardhigagan's picture
Upload 3 files
1851849 verified
import streamlit as st
import time
# Import our custom modules
from src.ingestion import extract_text_from_pdf, extract_text_from_image
from src.processing import chunk_text
from src.analysis import analyze_document
# --- 1. PAGE CONFIGURATION & STYLING ---
st.set_page_config(
page_title="LegalLens AI",
layout="wide",
initial_sidebar_state="collapsed"
)
# Inject Custom CSS for a Professional Look
st.markdown("""
<style>
/* Import a modern font */
@import url('https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;700&display=swap');
html, body, [class*="css"] {
font-family: 'Roboto', sans-serif;
}
/* --- HIDE ANCHOR LINKS (The chain icon) --- */
/* This hides it for H1, H2, H3, etc. */
[data-testid="stMarkdownContainer"] a.anchor-link {
display: none !important;
}
[data-testid="stHeader"] a.anchor-link {
display: none !important;
}
/* Main Heading Style */
.main-header {
color: #4A90E2;
font-weight: 700;
font-size: 3rem;
margin-bottom: 0px;
}
.sub-header {
color: #888;
font-size: 1.2rem;
margin-top: -10px;
}
/* --- VISIBILITY FIX: SOLID WHITE CARDS --- */
.result-card {
background-color: #FFFFFF; /* Solid White Background */
padding: 20px;
border-radius: 10px;
box-shadow: 0 4px 6px rgba(0,0,0,0.1); /* Soft shadow */
margin-bottom: 20px;
}
/* Force text inside cards to be Dark Grey (Readable) */
.result-card, .result-card p, .result-card div {
color: #333333 !important;
line-height: 1.6;
}
/* Risk Box Styling */
.risk-safe {
background-color: #E8F5E9; /* Light Green */
color: #1b5e20; /* Dark Green Text */
padding: 15px;
border-radius: 8px;
border-left: 5px solid #2E7D32;
font-weight: bold;
}
.risk-high {
background-color: #FFEBEE; /* Light Red */
color: #b71c1c; /* Dark Red Text */
padding: 15px;
border-radius: 8px;
border-left: 5px solid #C62828;
font-weight: bold;
margin-bottom: 10px;
}
/* Button Styling */
.stButton>button {
background-color: #4A90E2;
color: white;
border-radius: 8px;
padding: 10px 24px;
font-weight: bold;
border: none;
}
.stButton>button:hover {
background-color: #357abd;
}
</style>
""", unsafe_allow_html=True)
# --- 2. SIDEBAR ---
with st.sidebar:
st.header("About LegalLens AI")
st.markdown("---")
st.markdown("""
**LegalLens AI** is an AI-powered document intelligence platform designed to bring clarity to complex contracts.
**How it works:**
1. **Ingestion:** OCR extracts text from PDFs or images.
2. **Processing:** Long documents are intelligently chunked.
3. **Analysis:** Advanced Transformer models summarize content and detect high-risk clauses.
""")
st.markdown("---")
st.caption("Built by Ardhi Gagan.")
# --- 3. MAIN APPLICATION INTERFACE ---
# Top Banner (No Logo)
st.markdown('<h1 class="main-header">LegalLens AI</h1>', unsafe_allow_html=True)
st.markdown('<p class="sub-header">AI Contract Clarity & Risk Assessment</p>', unsafe_allow_html=True)
st.markdown("---")
# File Uploader Section
with st.container():
st.markdown("### Start Your Analysis") # Markdown handles styles better than st.write for headers
st.write("Upload a legal contract (PDF, PNG, or JPG) to receive an instant executive summary and risk report.")
uploaded_file = st.file_uploader("", type=["pdf", "png", "jpg", "jpeg"], label_visibility="collapsed")
if uploaded_file is not None:
# A. INGESTION STEP
with st.status("Reading document...", expanded=True) as status:
st.write("Initializing OCR engine...")
# Determine file type and extract text
if uploaded_file.name.lower().endswith('.pdf'):
raw_text = extract_text_from_pdf(uploaded_file.read())
else:
raw_text = extract_text_from_image(uploaded_file.read())
status.update(label="Document successfully read!", state="complete", expanded=False)
# Show preview
with st.expander("View Extracted Raw Text"):
st.text_area("", raw_text, height=200)
st.markdown("<br>", unsafe_allow_html=True) # Spacer
# B. ANALYSIS BUTTON & LOGIC
if st.button("Run AI Analysis"):
progress_bar = st.progress(0)
# Step 1: Chunking
with st.status("Performing AI Analysis...", expanded=True) as status:
st.write("Splitting document into context-aware chunks (LangChain)...")
chunks = chunk_text(raw_text)
progress_bar.progress(30)
st.write(f"Running inference on {len(chunks)} chunks using BART Transformer...")
# Step 2: AI Analysis
summary, risks = analyze_document(chunks)
progress_bar.progress(100)
status.update(label="Analysis Complete!", state="complete", expanded=False)
# C. DISPLAY RESULTS
st.markdown("---")
st.header("Analysis Report")
res_col1, res_col2 = st.columns(2, gap="large")
with res_col1:
# FIX: We put the Header AND Content inside the same HTML block
# This removes the empty white bar issue.
st.markdown(f"""
<div class="result-card">
<h3>Executive Summary</h3>
<p style="font-size: 1rem;">{summary}</p>
</div>
""", unsafe_allow_html=True)
with res_col2:
st.markdown('<div class="result-card"><h3>Key Risk Assessment</h3>', unsafe_allow_html=True)
if not risks:
st.markdown('<div class="risk-safe">Clean Bill of Health: No high-risk clauses detected.</div>', unsafe_allow_html=True)
else:
st.markdown(f'<div class="risk-high">Attention Required: {len(risks)} Potential Risks Found</div>', unsafe_allow_html=True)
st.write("Expand below to review specific flagged clauses:")
# --- NEW LOGIC: GROUP RISKS BY CATEGORY ---
# 1. Create a dictionary to group risks: {"Financial Penalty": [risk1, risk2], ...}
risk_groups = {}
for risk in risks:
label = risk['type']
if label not in risk_groups:
risk_groups[label] = []
risk_groups[label].append(risk)
# 2. Iterate through each unique category
for label, items in risk_groups.items():
# Find the highest confidence score in this group to show on the header
max_score = max([item['score'] for item in items])
count = len(items)
# Create ONE expander for the Category
with st.expander(f"{label} (Highest Confidence: {int(max_score*100)}%)"):
# List all instances inside
for i, item in enumerate(items):
st.markdown(f"**Instance {i+1}:**")
st.markdown(f"> *\"...{item['text_snippet']}...\"*")
st.caption(f"Confidence Score: {int(item['score']*100)}%")
# Add a divider if there are multiple instances
if i < count - 1:
st.markdown("---")
# ------------------------------------------
st.markdown('</div>', unsafe_allow_html=True)