Spaces:

solfedge
/

Clause_Lense

Sleeping

App Files Files Community

Clause_Lense / app.py

solfedge

Upload app.py

ee559a2 verified 6 months ago

raw

history blame contribute delete

4.24 kB


	import gradio as gr
	import os
	import traceback
	import time


	OUTPUT_DIR = "output"
	os.makedirs("data", exist_ok=True)
	os.makedirs(OUTPUT_DIR, exist_ok=True)
	os.makedirs("models", exist_ok=True)

	def process_contract(file):
	try:
	# Clearing previous outputs
	for f in os.listdir(OUTPUT_DIR):
	os.remove(os.path.join(OUTPUT_DIR, f))


	if file is None:
	yield " No file uploaded.", None, None, None
	return

	file_path = file.name
	ext = os.path.splitext(file_path)[1].lower()

	if ext not in [".pdf", ".docx"]:
	yield f" Unsupported format: {ext}", None, None, None
	return

	yield " Extracting text...", None, None, None
	time.sleep(0.1)

	# Extract text
	from parser import extract_text_from_pdf, extract_text_from_docx
	text = extract_text_from_pdf(file_path) if ext == ".pdf" else extract_text_from_docx(file_path)

	if not text or len(text.strip()) < 10:
	yield "⚠ Failed to extract meaningful text.", None, None, None
	return

	yield " Finding clauses...", None, None, None
	time.sleep(0.1)


	import spacy
	nlp = spacy.load("en_core_web_sm")
	doc = nlp(text)


	from spacy_matcher import find_clauses
	matches = find_clauses(text)
	if not matches:
	yield " No clauses detected.", None, None, None
	return

	yield f" Analyzing {len(matches)} clauses with LLM...", None, None, None
	time.sleep(0.1)

	# Analyzeing with LLM
	from llm_reviewer import review_clause_with_llm, get_clause_section
	results = []
	for label, _, start, end in matches:
	section = get_clause_section(doc, start, end, window_size=30)
	review = review_clause_with_llm(label, section)
	results.append({
	"label": label,
	"section": section,
	"review": review
	})


	from llm_reviewer import export_to_json, export_to_pdf
	json_path = export_to_json(results, os.path.join(OUTPUT_DIR, "clause_reviews.json"))
	pdf_path = export_to_pdf(results, os.path.join(OUTPUT_DIR, "clause_reviews.pdf"))


	output_text = "## Clause Reviews\n\n"
	for r in results:
	output_text += f" {r['label'].replace('_', ' ').title()}\n\n"
	output_text += f" Excerpt: {r['section'][:300]}...\n\n"
	output_text += f" Review: {r['review']}\n\n---\n\n"


	found_types = sorted(set(r['label'].replace('_', ' ').title() for r in results))
	clause_list = ", ".join(found_types)


	yield (
	f"Found {len(results)} clauses across {len(found_types)} types:\n\n{clause_list}",
	output_text,
	json_path,
	pdf_path
	)

	except Exception as e:
	tb = traceback.format_exc()
	error_msg = f" Error: {str(e)}\n\n```\n{tb}\n```"
	yield error_msg, None, None, None


	# Gradio Interface
	with gr.Blocks(title="ClauseLens - Legal Contract Analyzer", theme=gr.themes.Soft()) as demo:
	gr.Markdown("# ClauseLens: Legal Contract Analyzer")
	gr.Markdown("Upload a legal contract (PDF or DOCX) for clause detection and LLM-powered review.")

	with gr.Row():
	file_input = gr.File(label="Upload Contract", file_types=[".pdf", ".docx"])

	with gr.Row():
	btn = gr.Button(" Analyze Contract", variant="primary")

	with gr.Row():
	status = gr.Textbox(label="Status")

	with gr.Row():
	output = gr.Markdown(label="Clause Reviews")

	with gr.Row():
	gr.Markdown("### 📎 Download Reports")

	with gr.Row():
	json_download = gr.File(label="Download JSON Report")
	pdf_download = gr.File(label="Download PDF Report")


	btn.click(
	fn=process_contract,
	inputs=file_input,
	outputs=[status, output, json_download, pdf_download]
	)

	# Enable queuing for streaming
	demo.queue()

	if __name__ == "__main__":
	try:
	demo.launch(share=True)
	except Exception as e:
	print(f"Launch failed: {e}")