Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| import traceback | |
| import time | |
| OUTPUT_DIR = "output" | |
| os.makedirs("data", exist_ok=True) | |
| os.makedirs(OUTPUT_DIR, exist_ok=True) | |
| os.makedirs("models", exist_ok=True) | |
| def process_contract(file): | |
| try: | |
| # Clearing previous outputs | |
| for f in os.listdir(OUTPUT_DIR): | |
| os.remove(os.path.join(OUTPUT_DIR, f)) | |
| if file is None: | |
| yield " No file uploaded.", None, None, None | |
| return | |
| file_path = file.name | |
| ext = os.path.splitext(file_path)[1].lower() | |
| if ext not in [".pdf", ".docx"]: | |
| yield f" Unsupported format: {ext}", None, None, None | |
| return | |
| yield " Extracting text...", None, None, None | |
| time.sleep(0.1) | |
| # Extract text | |
| from parser import extract_text_from_pdf, extract_text_from_docx | |
| text = extract_text_from_pdf(file_path) if ext == ".pdf" else extract_text_from_docx(file_path) | |
| if not text or len(text.strip()) < 10: | |
| yield "โ Failed to extract meaningful text.", None, None, None | |
| return | |
| yield " Finding clauses...", None, None, None | |
| time.sleep(0.1) | |
| import spacy | |
| nlp = spacy.load("en_core_web_sm") | |
| doc = nlp(text) | |
| from spacy_matcher import find_clauses | |
| matches = find_clauses(text) | |
| if not matches: | |
| yield " No clauses detected.", None, None, None | |
| return | |
| yield f" Analyzing {len(matches)} clauses with LLM...", None, None, None | |
| time.sleep(0.1) | |
| # Analyzeing with LLM | |
| from llm_reviewer import review_clause_with_llm, get_clause_section | |
| results = [] | |
| for label, _, start, end in matches: | |
| section = get_clause_section(doc, start, end, window_size=30) | |
| review = review_clause_with_llm(label, section) | |
| results.append({ | |
| "label": label, | |
| "section": section, | |
| "review": review | |
| }) | |
| from llm_reviewer import export_to_json, export_to_pdf | |
| json_path = export_to_json(results, os.path.join(OUTPUT_DIR, "clause_reviews.json")) | |
| pdf_path = export_to_pdf(results, os.path.join(OUTPUT_DIR, "clause_reviews.pdf")) | |
| output_text = "## Clause Reviews\n\n" | |
| for r in results: | |
| output_text += f" **{r['label'].replace('_', ' ').title()}**\n\n" | |
| output_text += f" *Excerpt:* {r['section'][:300]}...\n\n" | |
| output_text += f" *Review:* {r['review']}\n\n---\n\n" | |
| found_types = sorted(set(r['label'].replace('_', ' ').title() for r in results)) | |
| clause_list = ", ".join(found_types) | |
| yield ( | |
| f"Found {len(results)} clauses across {len(found_types)} types:\n\n{clause_list}", | |
| output_text, | |
| json_path, | |
| pdf_path | |
| ) | |
| except Exception as e: | |
| tb = traceback.format_exc() | |
| error_msg = f" Error: {str(e)}\n\n```\n{tb}\n```" | |
| yield error_msg, None, None, None | |
| # Gradio Interface | |
| with gr.Blocks(title="ClauseLens - Legal Contract Analyzer", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# ClauseLens: Legal Contract Analyzer") | |
| gr.Markdown("Upload a legal contract (PDF or DOCX) for clause detection and LLM-powered review.") | |
| with gr.Row(): | |
| file_input = gr.File(label="Upload Contract", file_types=[".pdf", ".docx"]) | |
| with gr.Row(): | |
| btn = gr.Button(" Analyze Contract", variant="primary") | |
| with gr.Row(): | |
| status = gr.Textbox(label="Status") | |
| with gr.Row(): | |
| output = gr.Markdown(label="Clause Reviews") | |
| with gr.Row(): | |
| gr.Markdown("### ๐ Download Reports") | |
| with gr.Row(): | |
| json_download = gr.File(label="Download JSON Report") | |
| pdf_download = gr.File(label="Download PDF Report") | |
| btn.click( | |
| fn=process_contract, | |
| inputs=file_input, | |
| outputs=[status, output, json_download, pdf_download] | |
| ) | |
| # Enable queuing for streaming | |
| demo.queue() | |
| if __name__ == "__main__": | |
| try: | |
| demo.launch(share=True) | |
| except Exception as e: | |
| print(f"Launch failed: {e}") | |