solfedge commited on
Commit
ee559a2
·
verified ·
1 Parent(s): b7bf256

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +140 -0
app.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gradio as gr
3
+ import os
4
+ import traceback
5
+ import time
6
+
7
+
8
+ OUTPUT_DIR = "output"
9
+ os.makedirs("data", exist_ok=True)
10
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
11
+ os.makedirs("models", exist_ok=True)
12
+
13
+ def process_contract(file):
14
+ try:
15
+ # Clearing previous outputs
16
+ for f in os.listdir(OUTPUT_DIR):
17
+ os.remove(os.path.join(OUTPUT_DIR, f))
18
+
19
+
20
+ if file is None:
21
+ yield " No file uploaded.", None, None, None
22
+ return
23
+
24
+ file_path = file.name
25
+ ext = os.path.splitext(file_path)[1].lower()
26
+
27
+ if ext not in [".pdf", ".docx"]:
28
+ yield f" Unsupported format: {ext}", None, None, None
29
+ return
30
+
31
+ yield " Extracting text...", None, None, None
32
+ time.sleep(0.1)
33
+
34
+ # Extract text
35
+ from parser import extract_text_from_pdf, extract_text_from_docx
36
+ text = extract_text_from_pdf(file_path) if ext == ".pdf" else extract_text_from_docx(file_path)
37
+
38
+ if not text or len(text.strip()) < 10:
39
+ yield "⚠ Failed to extract meaningful text.", None, None, None
40
+ return
41
+
42
+ yield " Finding clauses...", None, None, None
43
+ time.sleep(0.1)
44
+
45
+
46
+ import spacy
47
+ nlp = spacy.load("en_core_web_sm")
48
+ doc = nlp(text)
49
+
50
+
51
+ from spacy_matcher import find_clauses
52
+ matches = find_clauses(text)
53
+ if not matches:
54
+ yield " No clauses detected.", None, None, None
55
+ return
56
+
57
+ yield f" Analyzing {len(matches)} clauses with LLM...", None, None, None
58
+ time.sleep(0.1)
59
+
60
+ # Analyzeing with LLM
61
+ from llm_reviewer import review_clause_with_llm, get_clause_section
62
+ results = []
63
+ for label, _, start, end in matches:
64
+ section = get_clause_section(doc, start, end, window_size=30)
65
+ review = review_clause_with_llm(label, section)
66
+ results.append({
67
+ "label": label,
68
+ "section": section,
69
+ "review": review
70
+ })
71
+
72
+
73
+ from llm_reviewer import export_to_json, export_to_pdf
74
+ json_path = export_to_json(results, os.path.join(OUTPUT_DIR, "clause_reviews.json"))
75
+ pdf_path = export_to_pdf(results, os.path.join(OUTPUT_DIR, "clause_reviews.pdf"))
76
+
77
+
78
+ output_text = "## Clause Reviews\n\n"
79
+ for r in results:
80
+ output_text += f" **{r['label'].replace('_', ' ').title()}**\n\n"
81
+ output_text += f" *Excerpt:* {r['section'][:300]}...\n\n"
82
+ output_text += f" *Review:* {r['review']}\n\n---\n\n"
83
+
84
+
85
+ found_types = sorted(set(r['label'].replace('_', ' ').title() for r in results))
86
+ clause_list = ", ".join(found_types)
87
+
88
+
89
+ yield (
90
+ f"Found {len(results)} clauses across {len(found_types)} types:\n\n{clause_list}",
91
+ output_text,
92
+ json_path,
93
+ pdf_path
94
+ )
95
+
96
+ except Exception as e:
97
+ tb = traceback.format_exc()
98
+ error_msg = f" Error: {str(e)}\n\n```\n{tb}\n```"
99
+ yield error_msg, None, None, None
100
+
101
+
102
+ # Gradio Interface
103
+ with gr.Blocks(title="ClauseLens - Legal Contract Analyzer", theme=gr.themes.Soft()) as demo:
104
+ gr.Markdown("# ClauseLens: Legal Contract Analyzer")
105
+ gr.Markdown("Upload a legal contract (PDF or DOCX) for clause detection and LLM-powered review.")
106
+
107
+ with gr.Row():
108
+ file_input = gr.File(label="Upload Contract", file_types=[".pdf", ".docx"])
109
+
110
+ with gr.Row():
111
+ btn = gr.Button(" Analyze Contract", variant="primary")
112
+
113
+ with gr.Row():
114
+ status = gr.Textbox(label="Status")
115
+
116
+ with gr.Row():
117
+ output = gr.Markdown(label="Clause Reviews")
118
+
119
+ with gr.Row():
120
+ gr.Markdown("### 📎 Download Reports")
121
+
122
+ with gr.Row():
123
+ json_download = gr.File(label="Download JSON Report")
124
+ pdf_download = gr.File(label="Download PDF Report")
125
+
126
+
127
+ btn.click(
128
+ fn=process_contract,
129
+ inputs=file_input,
130
+ outputs=[status, output, json_download, pdf_download]
131
+ )
132
+
133
+ # Enable queuing for streaming
134
+ demo.queue()
135
+
136
+ if __name__ == "__main__":
137
+ try:
138
+ demo.launch(share=True)
139
+ except Exception as e:
140
+ print(f"Launch failed: {e}")