File size: 4,238 Bytes
ee559a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141

import gradio as gr
import os
import traceback
import time


OUTPUT_DIR = "output"
os.makedirs("data", exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs("models", exist_ok=True)

def process_contract(file):
    try:
        # Clearing  previous outputs
        for f in os.listdir(OUTPUT_DIR):
            os.remove(os.path.join(OUTPUT_DIR, f))

        
        if file is None:
            yield " No file uploaded.", None, None, None
            return

        file_path = file.name
        ext = os.path.splitext(file_path)[1].lower()

        if ext not in [".pdf", ".docx"]:
            yield f" Unsupported format: {ext}", None, None, None
            return

        yield " Extracting text...", None, None, None
        time.sleep(0.1)

        # Extract text
        from parser import extract_text_from_pdf, extract_text_from_docx
        text = extract_text_from_pdf(file_path) if ext == ".pdf" else extract_text_from_docx(file_path)

        if not text or len(text.strip()) < 10:
            yield "⚠ Failed to extract meaningful text.", None, None, None
            return

        yield " Finding clauses...", None, None, None
        time.sleep(0.1)

        
        import spacy
        nlp = spacy.load("en_core_web_sm")
        doc = nlp(text)

        
        from spacy_matcher import find_clauses
        matches = find_clauses(text)
        if not matches:
            yield " No clauses detected.", None, None, None
            return

        yield f" Analyzing {len(matches)} clauses with LLM...", None, None, None
        time.sleep(0.1)

        # Analyzeing with LLM
        from llm_reviewer import review_clause_with_llm, get_clause_section
        results = []
        for label, _, start, end in matches:
            section = get_clause_section(doc, start, end, window_size=30)
            review = review_clause_with_llm(label, section)
            results.append({
                "label": label,
                "section": section,
                "review": review
            })

        
        from llm_reviewer import export_to_json, export_to_pdf
        json_path = export_to_json(results, os.path.join(OUTPUT_DIR, "clause_reviews.json"))
        pdf_path = export_to_pdf(results, os.path.join(OUTPUT_DIR, "clause_reviews.pdf"))

        
        output_text = "##  Clause Reviews\n\n"
        for r in results:
            output_text += f" **{r['label'].replace('_', ' ').title()}**\n\n"
            output_text += f" *Excerpt:* {r['section'][:300]}...\n\n"
            output_text += f" *Review:* {r['review']}\n\n---\n\n"

        
        found_types = sorted(set(r['label'].replace('_', ' ').title() for r in results))
        clause_list = ", ".join(found_types)

        
        yield (
            f"Found {len(results)} clauses across {len(found_types)} types:\n\n{clause_list}",
            output_text,
            json_path,
            pdf_path
        )

    except Exception as e:
        tb = traceback.format_exc()
        error_msg = f" Error: {str(e)}\n\n```\n{tb}\n```"
        yield error_msg, None, None, None  


# Gradio Interface 
with gr.Blocks(title="ClauseLens - Legal Contract Analyzer", theme=gr.themes.Soft()) as demo:
    gr.Markdown("#  ClauseLens: Legal Contract Analyzer")
    gr.Markdown("Upload a legal contract (PDF or DOCX) for clause detection and LLM-powered review.")

    with gr.Row():
        file_input = gr.File(label="Upload Contract", file_types=[".pdf", ".docx"])

    with gr.Row():
        btn = gr.Button(" Analyze Contract", variant="primary")

    with gr.Row():
        status = gr.Textbox(label="Status")

    with gr.Row():
        output = gr.Markdown(label="Clause Reviews")

    with gr.Row():
        gr.Markdown("### 📎 Download Reports")

    with gr.Row():
        json_download = gr.File(label="Download JSON Report")
        pdf_download = gr.File(label="Download PDF Report")

    
    btn.click(
        fn=process_contract,
        inputs=file_input,
        outputs=[status, output, json_download, pdf_download]
    )

# Enable queuing for streaming
demo.queue()

if __name__ == "__main__":
    try:
        demo.launch(share=True)
    except Exception as e:
        print(f"Launch failed: {e}")