Spaces:
Sleeping
Sleeping
File size: 4,238 Bytes
ee559a2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
import gradio as gr
import os
import traceback
import time
OUTPUT_DIR = "output"
os.makedirs("data", exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs("models", exist_ok=True)
def process_contract(file):
try:
# Clearing previous outputs
for f in os.listdir(OUTPUT_DIR):
os.remove(os.path.join(OUTPUT_DIR, f))
if file is None:
yield " No file uploaded.", None, None, None
return
file_path = file.name
ext = os.path.splitext(file_path)[1].lower()
if ext not in [".pdf", ".docx"]:
yield f" Unsupported format: {ext}", None, None, None
return
yield " Extracting text...", None, None, None
time.sleep(0.1)
# Extract text
from parser import extract_text_from_pdf, extract_text_from_docx
text = extract_text_from_pdf(file_path) if ext == ".pdf" else extract_text_from_docx(file_path)
if not text or len(text.strip()) < 10:
yield "⚠ Failed to extract meaningful text.", None, None, None
return
yield " Finding clauses...", None, None, None
time.sleep(0.1)
import spacy
nlp = spacy.load("en_core_web_sm")
doc = nlp(text)
from spacy_matcher import find_clauses
matches = find_clauses(text)
if not matches:
yield " No clauses detected.", None, None, None
return
yield f" Analyzing {len(matches)} clauses with LLM...", None, None, None
time.sleep(0.1)
# Analyzeing with LLM
from llm_reviewer import review_clause_with_llm, get_clause_section
results = []
for label, _, start, end in matches:
section = get_clause_section(doc, start, end, window_size=30)
review = review_clause_with_llm(label, section)
results.append({
"label": label,
"section": section,
"review": review
})
from llm_reviewer import export_to_json, export_to_pdf
json_path = export_to_json(results, os.path.join(OUTPUT_DIR, "clause_reviews.json"))
pdf_path = export_to_pdf(results, os.path.join(OUTPUT_DIR, "clause_reviews.pdf"))
output_text = "## Clause Reviews\n\n"
for r in results:
output_text += f" **{r['label'].replace('_', ' ').title()}**\n\n"
output_text += f" *Excerpt:* {r['section'][:300]}...\n\n"
output_text += f" *Review:* {r['review']}\n\n---\n\n"
found_types = sorted(set(r['label'].replace('_', ' ').title() for r in results))
clause_list = ", ".join(found_types)
yield (
f"Found {len(results)} clauses across {len(found_types)} types:\n\n{clause_list}",
output_text,
json_path,
pdf_path
)
except Exception as e:
tb = traceback.format_exc()
error_msg = f" Error: {str(e)}\n\n```\n{tb}\n```"
yield error_msg, None, None, None
# Gradio Interface
with gr.Blocks(title="ClauseLens - Legal Contract Analyzer", theme=gr.themes.Soft()) as demo:
gr.Markdown("# ClauseLens: Legal Contract Analyzer")
gr.Markdown("Upload a legal contract (PDF or DOCX) for clause detection and LLM-powered review.")
with gr.Row():
file_input = gr.File(label="Upload Contract", file_types=[".pdf", ".docx"])
with gr.Row():
btn = gr.Button(" Analyze Contract", variant="primary")
with gr.Row():
status = gr.Textbox(label="Status")
with gr.Row():
output = gr.Markdown(label="Clause Reviews")
with gr.Row():
gr.Markdown("### 📎 Download Reports")
with gr.Row():
json_download = gr.File(label="Download JSON Report")
pdf_download = gr.File(label="Download PDF Report")
btn.click(
fn=process_contract,
inputs=file_input,
outputs=[status, output, json_download, pdf_download]
)
# Enable queuing for streaming
demo.queue()
if __name__ == "__main__":
try:
demo.launch(share=True)
except Exception as e:
print(f"Launch failed: {e}")
|