import gradio as gr import os import traceback import time OUTPUT_DIR = "output" os.makedirs("data", exist_ok=True) os.makedirs(OUTPUT_DIR, exist_ok=True) os.makedirs("models", exist_ok=True) def process_contract(file): try: # Clearing previous outputs for f in os.listdir(OUTPUT_DIR): os.remove(os.path.join(OUTPUT_DIR, f)) if file is None: yield " No file uploaded.", None, None, None return file_path = file.name ext = os.path.splitext(file_path)[1].lower() if ext not in [".pdf", ".docx"]: yield f" Unsupported format: {ext}", None, None, None return yield " Extracting text...", None, None, None time.sleep(0.1) # Extract text from parser import extract_text_from_pdf, extract_text_from_docx text = extract_text_from_pdf(file_path) if ext == ".pdf" else extract_text_from_docx(file_path) if not text or len(text.strip()) < 10: yield "⚠ Failed to extract meaningful text.", None, None, None return yield " Finding clauses...", None, None, None time.sleep(0.1) import spacy nlp = spacy.load("en_core_web_sm") doc = nlp(text) from spacy_matcher import find_clauses matches = find_clauses(text) if not matches: yield " No clauses detected.", None, None, None return yield f" Analyzing {len(matches)} clauses with LLM...", None, None, None time.sleep(0.1) # Analyzeing with LLM from llm_reviewer import review_clause_with_llm, get_clause_section results = [] for label, _, start, end in matches: section = get_clause_section(doc, start, end, window_size=30) review = review_clause_with_llm(label, section) results.append({ "label": label, "section": section, "review": review }) from llm_reviewer import export_to_json, export_to_pdf json_path = export_to_json(results, os.path.join(OUTPUT_DIR, "clause_reviews.json")) pdf_path = export_to_pdf(results, os.path.join(OUTPUT_DIR, "clause_reviews.pdf")) output_text = "## Clause Reviews\n\n" for r in results: output_text += f" **{r['label'].replace('_', ' ').title()}**\n\n" output_text += f" *Excerpt:* {r['section'][:300]}...\n\n" output_text += f" *Review:* {r['review']}\n\n---\n\n" found_types = sorted(set(r['label'].replace('_', ' ').title() for r in results)) clause_list = ", ".join(found_types) yield ( f"Found {len(results)} clauses across {len(found_types)} types:\n\n{clause_list}", output_text, json_path, pdf_path ) except Exception as e: tb = traceback.format_exc() error_msg = f" Error: {str(e)}\n\n```\n{tb}\n```" yield error_msg, None, None, None # Gradio Interface with gr.Blocks(title="ClauseLens - Legal Contract Analyzer", theme=gr.themes.Soft()) as demo: gr.Markdown("# ClauseLens: Legal Contract Analyzer") gr.Markdown("Upload a legal contract (PDF or DOCX) for clause detection and LLM-powered review.") with gr.Row(): file_input = gr.File(label="Upload Contract", file_types=[".pdf", ".docx"]) with gr.Row(): btn = gr.Button(" Analyze Contract", variant="primary") with gr.Row(): status = gr.Textbox(label="Status") with gr.Row(): output = gr.Markdown(label="Clause Reviews") with gr.Row(): gr.Markdown("### 📎 Download Reports") with gr.Row(): json_download = gr.File(label="Download JSON Report") pdf_download = gr.File(label="Download PDF Report") btn.click( fn=process_contract, inputs=file_input, outputs=[status, output, json_download, pdf_download] ) # Enable queuing for streaming demo.queue() if __name__ == "__main__": try: demo.launch(share=True) except Exception as e: print(f"Launch failed: {e}")