Spaces:
Sleeping
Sleeping
File size: 8,338 Bytes
1851849 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 | import streamlit as st
import time
# Import our custom modules
from src.ingestion import extract_text_from_pdf, extract_text_from_image
from src.processing import chunk_text
from src.analysis import analyze_document
# --- 1. PAGE CONFIGURATION & STYLING ---
st.set_page_config(
page_title="LegalLens AI",
layout="wide",
initial_sidebar_state="collapsed"
)
# Inject Custom CSS for a Professional Look
st.markdown("""
<style>
/* Import a modern font */
@import url('https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;700&display=swap');
html, body, [class*="css"] {
font-family: 'Roboto', sans-serif;
}
/* --- HIDE ANCHOR LINKS (The chain icon) --- */
/* This hides it for H1, H2, H3, etc. */
[data-testid="stMarkdownContainer"] a.anchor-link {
display: none !important;
}
[data-testid="stHeader"] a.anchor-link {
display: none !important;
}
/* Main Heading Style */
.main-header {
color: #4A90E2;
font-weight: 700;
font-size: 3rem;
margin-bottom: 0px;
}
.sub-header {
color: #888;
font-size: 1.2rem;
margin-top: -10px;
}
/* --- VISIBILITY FIX: SOLID WHITE CARDS --- */
.result-card {
background-color: #FFFFFF; /* Solid White Background */
padding: 20px;
border-radius: 10px;
box-shadow: 0 4px 6px rgba(0,0,0,0.1); /* Soft shadow */
margin-bottom: 20px;
}
/* Force text inside cards to be Dark Grey (Readable) */
.result-card, .result-card p, .result-card div {
color: #333333 !important;
line-height: 1.6;
}
/* Risk Box Styling */
.risk-safe {
background-color: #E8F5E9; /* Light Green */
color: #1b5e20; /* Dark Green Text */
padding: 15px;
border-radius: 8px;
border-left: 5px solid #2E7D32;
font-weight: bold;
}
.risk-high {
background-color: #FFEBEE; /* Light Red */
color: #b71c1c; /* Dark Red Text */
padding: 15px;
border-radius: 8px;
border-left: 5px solid #C62828;
font-weight: bold;
margin-bottom: 10px;
}
/* Button Styling */
.stButton>button {
background-color: #4A90E2;
color: white;
border-radius: 8px;
padding: 10px 24px;
font-weight: bold;
border: none;
}
.stButton>button:hover {
background-color: #357abd;
}
</style>
""", unsafe_allow_html=True)
# --- 2. SIDEBAR ---
with st.sidebar:
st.header("About LegalLens AI")
st.markdown("---")
st.markdown("""
**LegalLens AI** is an AI-powered document intelligence platform designed to bring clarity to complex contracts.
**How it works:**
1. **Ingestion:** OCR extracts text from PDFs or images.
2. **Processing:** Long documents are intelligently chunked.
3. **Analysis:** Advanced Transformer models summarize content and detect high-risk clauses.
""")
st.markdown("---")
st.caption("Built by Ardhi Gagan.")
# --- 3. MAIN APPLICATION INTERFACE ---
# Top Banner (No Logo)
st.markdown('<h1 class="main-header">LegalLens AI</h1>', unsafe_allow_html=True)
st.markdown('<p class="sub-header">AI Contract Clarity & Risk Assessment</p>', unsafe_allow_html=True)
st.markdown("---")
# File Uploader Section
with st.container():
st.markdown("### Start Your Analysis") # Markdown handles styles better than st.write for headers
st.write("Upload a legal contract (PDF, PNG, or JPG) to receive an instant executive summary and risk report.")
uploaded_file = st.file_uploader("", type=["pdf", "png", "jpg", "jpeg"], label_visibility="collapsed")
if uploaded_file is not None:
# A. INGESTION STEP
with st.status("Reading document...", expanded=True) as status:
st.write("Initializing OCR engine...")
# Determine file type and extract text
if uploaded_file.name.lower().endswith('.pdf'):
raw_text = extract_text_from_pdf(uploaded_file.read())
else:
raw_text = extract_text_from_image(uploaded_file.read())
status.update(label="Document successfully read!", state="complete", expanded=False)
# Show preview
with st.expander("View Extracted Raw Text"):
st.text_area("", raw_text, height=200)
st.markdown("<br>", unsafe_allow_html=True) # Spacer
# B. ANALYSIS BUTTON & LOGIC
if st.button("Run AI Analysis"):
progress_bar = st.progress(0)
# Step 1: Chunking
with st.status("Performing AI Analysis...", expanded=True) as status:
st.write("Splitting document into context-aware chunks (LangChain)...")
chunks = chunk_text(raw_text)
progress_bar.progress(30)
st.write(f"Running inference on {len(chunks)} chunks using BART Transformer...")
# Step 2: AI Analysis
summary, risks = analyze_document(chunks)
progress_bar.progress(100)
status.update(label="Analysis Complete!", state="complete", expanded=False)
# C. DISPLAY RESULTS
st.markdown("---")
st.header("Analysis Report")
res_col1, res_col2 = st.columns(2, gap="large")
with res_col1:
# FIX: We put the Header AND Content inside the same HTML block
# This removes the empty white bar issue.
st.markdown(f"""
<div class="result-card">
<h3>Executive Summary</h3>
<p style="font-size: 1rem;">{summary}</p>
</div>
""", unsafe_allow_html=True)
with res_col2:
st.markdown('<div class="result-card"><h3>Key Risk Assessment</h3>', unsafe_allow_html=True)
if not risks:
st.markdown('<div class="risk-safe">Clean Bill of Health: No high-risk clauses detected.</div>', unsafe_allow_html=True)
else:
st.markdown(f'<div class="risk-high">Attention Required: {len(risks)} Potential Risks Found</div>', unsafe_allow_html=True)
st.write("Expand below to review specific flagged clauses:")
# --- NEW LOGIC: GROUP RISKS BY CATEGORY ---
# 1. Create a dictionary to group risks: {"Financial Penalty": [risk1, risk2], ...}
risk_groups = {}
for risk in risks:
label = risk['type']
if label not in risk_groups:
risk_groups[label] = []
risk_groups[label].append(risk)
# 2. Iterate through each unique category
for label, items in risk_groups.items():
# Find the highest confidence score in this group to show on the header
max_score = max([item['score'] for item in items])
count = len(items)
# Create ONE expander for the Category
with st.expander(f"{label} (Highest Confidence: {int(max_score*100)}%)"):
# List all instances inside
for i, item in enumerate(items):
st.markdown(f"**Instance {i+1}:**")
st.markdown(f"> *\"...{item['text_snippet']}...\"*")
st.caption(f"Confidence Score: {int(item['score']*100)}%")
# Add a divider if there are multiple instances
if i < count - 1:
st.markdown("---")
# ------------------------------------------
st.markdown('</div>', unsafe_allow_html=True) |