Spaces:

ardhigagan
/

LegalLens-AI

Running

App Files Files Community

LegalLens-AI / app.py

ardhigagan

Upload 3 files

1851849 verified 2 days ago

raw

history blame contribute delete

8.34 kB

	import streamlit as st
	import time

	# Import our custom modules
	from src.ingestion import extract_text_from_pdf, extract_text_from_image
	from src.processing import chunk_text
	from src.analysis import analyze_document

	# --- 1. PAGE CONFIGURATION & STYLING ---
	st.set_page_config(
	page_title="LegalLens AI",
	layout="wide",
	initial_sidebar_state="collapsed"
	)

	# Inject Custom CSS for a Professional Look
	st.markdown("""
	<style>
	/* Import a modern font */
	@import url('https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;700&display=swap');

	html, body, [class*="css"] {
	font-family: 'Roboto', sans-serif;
	}

	/* --- HIDE ANCHOR LINKS (The chain icon) --- */
	/* This hides it for H1, H2, H3, etc. */
	[data-testid="stMarkdownContainer"] a.anchor-link {
	display: none !important;
	}
	[data-testid="stHeader"] a.anchor-link {
	display: none !important;
	}

	/* Main Heading Style */
	.main-header {
	color: #4A90E2;
	font-weight: 700;
	font-size: 3rem;
	margin-bottom: 0px;
	}
	.sub-header {
	color: #888;
	font-size: 1.2rem;
	margin-top: -10px;
	}

	/* --- VISIBILITY FIX: SOLID WHITE CARDS --- */
	.result-card {
	background-color: #FFFFFF; /* Solid White Background */
	padding: 20px;
	border-radius: 10px;
	box-shadow: 0 4px 6px rgba(0,0,0,0.1); /* Soft shadow */
	margin-bottom: 20px;
	}

	/* Force text inside cards to be Dark Grey (Readable) */
	.result-card, .result-card p, .result-card div {
	color: #333333 !important;
	line-height: 1.6;
	}

	/* Risk Box Styling */
	.risk-safe {
	background-color: #E8F5E9; /* Light Green */
	color: #1b5e20; /* Dark Green Text */
	padding: 15px;
	border-radius: 8px;
	border-left: 5px solid #2E7D32;
	font-weight: bold;
	}
	.risk-high {
	background-color: #FFEBEE; /* Light Red */
	color: #b71c1c; /* Dark Red Text */
	padding: 15px;
	border-radius: 8px;
	border-left: 5px solid #C62828;
	font-weight: bold;
	margin-bottom: 10px;
	}

	/* Button Styling */
	.stButton>button {
	background-color: #4A90E2;
	color: white;
	border-radius: 8px;
	padding: 10px 24px;
	font-weight: bold;
	border: none;
	}
	.stButton>button:hover {
	background-color: #357abd;
	}
	</style>
	""", unsafe_allow_html=True)

	# --- 2. SIDEBAR ---
	with st.sidebar:
	st.header("About LegalLens AI")
	st.markdown("---")
	st.markdown("""
	LegalLens AI is an AI-powered document intelligence platform designed to bring clarity to complex contracts.

	How it works:
	1. Ingestion: OCR extracts text from PDFs or images.
	2. Processing: Long documents are intelligently chunked.
	3. Analysis: Advanced Transformer models summarize content and detect high-risk clauses.
	""")
	st.markdown("---")
	st.caption("Built by Ardhi Gagan.")


	# --- 3. MAIN APPLICATION INTERFACE ---

	# Top Banner (No Logo)
	st.markdown('<h1 class="main-header">LegalLens AI</h1>', unsafe_allow_html=True)
	st.markdown('<p class="sub-header">AI Contract Clarity & Risk Assessment</p>', unsafe_allow_html=True)

	st.markdown("---")

	# File Uploader Section
	with st.container():
	st.markdown("### Start Your Analysis") # Markdown handles styles better than st.write for headers
	st.write("Upload a legal contract (PDF, PNG, or JPG) to receive an instant executive summary and risk report.")
	uploaded_file = st.file_uploader("", type=["pdf", "png", "jpg", "jpeg"], label_visibility="collapsed")


	if uploaded_file is not None:
	# A. INGESTION STEP
	with st.status("Reading document...", expanded=True) as status:
	st.write("Initializing OCR engine...")
	# Determine file type and extract text
	if uploaded_file.name.lower().endswith('.pdf'):
	raw_text = extract_text_from_pdf(uploaded_file.read())
	else:
	raw_text = extract_text_from_image(uploaded_file.read())

	status.update(label="Document successfully read!", state="complete", expanded=False)

	# Show preview
	with st.expander("View Extracted Raw Text"):
	st.text_area("", raw_text, height=200)

	st.markdown("<br>", unsafe_allow_html=True) # Spacer

	# B. ANALYSIS BUTTON & LOGIC
	if st.button("Run AI Analysis"):
	progress_bar = st.progress(0)

	# Step 1: Chunking
	with st.status("Performing AI Analysis...", expanded=True) as status:
	st.write("Splitting document into context-aware chunks (LangChain)...")
	chunks = chunk_text(raw_text)
	progress_bar.progress(30)

	st.write(f"Running inference on {len(chunks)} chunks using BART Transformer...")
	# Step 2: AI Analysis
	summary, risks = analyze_document(chunks)
	progress_bar.progress(100)
	status.update(label="Analysis Complete!", state="complete", expanded=False)

	# C. DISPLAY RESULTS
	st.markdown("---")
	st.header("Analysis Report")

	res_col1, res_col2 = st.columns(2, gap="large")

	with res_col1:
	# FIX: We put the Header AND Content inside the same HTML block
	# This removes the empty white bar issue.
	st.markdown(f"""
	<div class="result-card">
	<h3>Executive Summary</h3>
	<p style="font-size: 1rem;">{summary}</p>
	</div>
	""", unsafe_allow_html=True)

	with res_col2:
	st.markdown('<div class="result-card"><h3>Key Risk Assessment</h3>', unsafe_allow_html=True)

	if not risks:
	st.markdown('<div class="risk-safe">Clean Bill of Health: No high-risk clauses detected.</div>', unsafe_allow_html=True)
	else:
	st.markdown(f'<div class="risk-high">Attention Required: {len(risks)} Potential Risks Found</div>', unsafe_allow_html=True)
	st.write("Expand below to review specific flagged clauses:")

	# --- NEW LOGIC: GROUP RISKS BY CATEGORY ---
	# 1. Create a dictionary to group risks: {"Financial Penalty": [risk1, risk2], ...}
	risk_groups = {}
	for risk in risks:
	label = risk['type']
	if label not in risk_groups:
	risk_groups[label] = []
	risk_groups[label].append(risk)

	# 2. Iterate through each unique category
	for label, items in risk_groups.items():
	# Find the highest confidence score in this group to show on the header
	max_score = max([item['score'] for item in items])
	count = len(items)

	# Create ONE expander for the Category
	with st.expander(f"{label} (Highest Confidence: {int(max_score*100)}%)"):
	# List all instances inside
	for i, item in enumerate(items):
	st.markdown(f"Instance {i+1}:")
	st.markdown(f"> \"...{item['text_snippet']}...\"")
	st.caption(f"Confidence Score: {int(item['score']*100)}%")
	# Add a divider if there are multiple instances
	if i < count - 1:
	st.markdown("---")
	# ------------------------------------------

	st.markdown('</div>', unsafe_allow_html=True)