Clause_Lense / app.py
solfedge's picture
Upload app.py
ee559a2 verified
import gradio as gr
import os
import traceback
import time
OUTPUT_DIR = "output"
os.makedirs("data", exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs("models", exist_ok=True)
def process_contract(file):
try:
# Clearing previous outputs
for f in os.listdir(OUTPUT_DIR):
os.remove(os.path.join(OUTPUT_DIR, f))
if file is None:
yield " No file uploaded.", None, None, None
return
file_path = file.name
ext = os.path.splitext(file_path)[1].lower()
if ext not in [".pdf", ".docx"]:
yield f" Unsupported format: {ext}", None, None, None
return
yield " Extracting text...", None, None, None
time.sleep(0.1)
# Extract text
from parser import extract_text_from_pdf, extract_text_from_docx
text = extract_text_from_pdf(file_path) if ext == ".pdf" else extract_text_from_docx(file_path)
if not text or len(text.strip()) < 10:
yield "โš  Failed to extract meaningful text.", None, None, None
return
yield " Finding clauses...", None, None, None
time.sleep(0.1)
import spacy
nlp = spacy.load("en_core_web_sm")
doc = nlp(text)
from spacy_matcher import find_clauses
matches = find_clauses(text)
if not matches:
yield " No clauses detected.", None, None, None
return
yield f" Analyzing {len(matches)} clauses with LLM...", None, None, None
time.sleep(0.1)
# Analyzeing with LLM
from llm_reviewer import review_clause_with_llm, get_clause_section
results = []
for label, _, start, end in matches:
section = get_clause_section(doc, start, end, window_size=30)
review = review_clause_with_llm(label, section)
results.append({
"label": label,
"section": section,
"review": review
})
from llm_reviewer import export_to_json, export_to_pdf
json_path = export_to_json(results, os.path.join(OUTPUT_DIR, "clause_reviews.json"))
pdf_path = export_to_pdf(results, os.path.join(OUTPUT_DIR, "clause_reviews.pdf"))
output_text = "## Clause Reviews\n\n"
for r in results:
output_text += f" **{r['label'].replace('_', ' ').title()}**\n\n"
output_text += f" *Excerpt:* {r['section'][:300]}...\n\n"
output_text += f" *Review:* {r['review']}\n\n---\n\n"
found_types = sorted(set(r['label'].replace('_', ' ').title() for r in results))
clause_list = ", ".join(found_types)
yield (
f"Found {len(results)} clauses across {len(found_types)} types:\n\n{clause_list}",
output_text,
json_path,
pdf_path
)
except Exception as e:
tb = traceback.format_exc()
error_msg = f" Error: {str(e)}\n\n```\n{tb}\n```"
yield error_msg, None, None, None
# Gradio Interface
with gr.Blocks(title="ClauseLens - Legal Contract Analyzer", theme=gr.themes.Soft()) as demo:
gr.Markdown("# ClauseLens: Legal Contract Analyzer")
gr.Markdown("Upload a legal contract (PDF or DOCX) for clause detection and LLM-powered review.")
with gr.Row():
file_input = gr.File(label="Upload Contract", file_types=[".pdf", ".docx"])
with gr.Row():
btn = gr.Button(" Analyze Contract", variant="primary")
with gr.Row():
status = gr.Textbox(label="Status")
with gr.Row():
output = gr.Markdown(label="Clause Reviews")
with gr.Row():
gr.Markdown("### ๐Ÿ“Ž Download Reports")
with gr.Row():
json_download = gr.File(label="Download JSON Report")
pdf_download = gr.File(label="Download PDF Report")
btn.click(
fn=process_contract,
inputs=file_input,
outputs=[status, output, json_download, pdf_download]
)
# Enable queuing for streaming
demo.queue()
if __name__ == "__main__":
try:
demo.launch(share=True)
except Exception as e:
print(f"Launch failed: {e}")