narcolepticchicken's picture
Upload app.py
d07f26a verified
"""Gradio Space β€” Contract Drafting Assistant.
Runs on free CPU. Uses template engine + 37K clause retriever.
GPU upgrade needed for LLM-powered drafting: switch to t4-small in Settings.
"""
import os, sys, json, shutil
os.environ["PYTHONUNBUFFERED"] = "1"
import gradio as gr
import numpy as np
from huggingface_hub import hf_hub_download, HfApi
HUB_USER = "narcolepticchicken"
WORK_DIR = "/tmp"
os.makedirs(WORK_DIR, exist_ok=True)
print("=== Contract Drafting Assistant ===")
# Local imports β€” bundled in this Space repo
from playbook import get_required_clauses, get_fallback_position
from dense_retriever import ClauseRetrieverV4
try:
from templates_v2 import render_template
except Exception as e:
print(f"[WARN] templates_v2 not available: {e}")
render_template = None
CONTRACT_TYPES = [
"saas_agreement", "msa", "nda", "dpa", "sow",
"vendor_agreement", "consulting_agreement", "ip_assignment",
"employment_agreement",
]
PARTY_POSITIONS = ["pro_company", "balanced", "pro_counterparty"]
_retriever = None
def get_retriever():
global _retriever
if _retriever:
return _retriever
INDEX_HUB = f"{HUB_USER}/contract-clause-index-v1"
INDEX_PATH = os.path.join(WORK_DIR, "dense_index")
# Try to use HF token if available (secret or env var)
token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN") or True
api = HfApi(token=token)
try:
api.list_repo_files(INDEX_HUB, repo_type="dataset")
os.makedirs(INDEX_PATH, exist_ok=True)
for fn in ["corpus.json", "embeddings.npy", "bm25.pkl"]:
shutil.copy(
hf_hub_download(repo_id=INDEX_HUB, filename=fn, repo_type="dataset", token=token),
os.path.join(INDEX_PATH, fn),
)
_retriever = ClauseRetrieverV4()
_retriever.load(INDEX_PATH)
print(f" [OK] Retriever loaded ({len(_retriever.corpus)} clauses)")
except Exception as e:
print(f" [WARN] No index: {e}")
_retriever = None
return _retriever
def retrieve_precedents(clause_name, contract_type):
"""Get reference clauses from EDGAR corpus."""
retriever = get_retriever()
if not retriever:
return ""
try:
results = retriever.retrieve(
query=f"{clause_name.replace('_',' ')} {contract_type.replace('_',' ')}",
top_k=2,
)
if results:
return "REFERENCES (EDGAR):\n\n" + "\n---\n".join(
f"{r['clause_text'][:500]}" for r in results[:2]
)
except Exception as e:
print(f" [RETRIEVE FAIL] {clause_name}: {e}")
return ""
def draft_with_template(clause_name, contract_type, party_position, company, counterparty,
deal_context, constraints_text, governing_law):
"""Draft a clause using the template engine with context."""
if not render_template:
return f"[{clause_name.replace('_', ' ').title()} β€” template engine unavailable]"
cons = constraints_text.split(",") if constraints_text else []
fb = get_fallback_position(clause_name, party_position)
ctx = {
"company_name": company,
"counterparty_name": counterparty,
"contract_type": contract_type,
"party_position": party_position,
"governing_law": governing_law,
"deal_context": deal_context,
"business_constraints": cons,
}
try:
text = render_template(clause_name, ctx, fb or {})
if text and len(text) >= 5:
return text
except Exception as e:
print(f" [TEMPLATE FAIL] {clause_name}: {e}")
return f"[{clause_name.replace('_', ' ').title()} β€” no template available]"
def draft_contract(contract_type, party_position, company, counterparty,
deal_context, constraints_text, governing_law, progress=gr.Progress()):
"""Draft a full contract clause by clause."""
# Build clause order
required = get_required_clauses(contract_type)
standard = [
"entire_agreement", "severability", "waiver", "force_majeure", "notices",
"relationship_of_parties", "no_third_party_beneficiaries",
"amendments", "governing_law", "signature_block",
]
order = ["preamble", "definitions"] + required + standard
seen = set()
order = [x for x in order if not (x in seen or seen.add(x))]
total = len(order)
clauses = []
for i, clause_name in enumerate(order):
progress((i + 1) / total, f"Drafting: {clause_name.replace('_', ' ')}...")
# Get precedents for reference
precedents = retrieve_precedents(clause_name, contract_type)
# Draft via template
text = draft_with_template(
clause_name, contract_type, party_position, company, counterparty,
deal_context, constraints_text, governing_law,
)
clauses.append({
"name": clause_name,
"text": text,
"precedents": precedents,
})
# Render markdown
md = f"# {contract_type.replace('_', ' ').title()}\n\n"
md += f"**{company} ⬄ {counterparty}**\n\n"
md += f"**Position**: {party_position.replace('_', ' ').title()}"
md += f" | **Governing Law**: {governing_law}\n\n"
md += f"**Deal**: {deal_context}\n"
md += f"**Constraints**: {constraints_text or 'none'}\n\n---\n\n"
for c in clauses:
md += f"## {c['name'].replace('_', ' ').title()}\n\n{c['text']}\n\n"
if c.get("precedents"):
md += f"<details>\n<summary>πŸ“š Precedent References</summary>\n\n{c['precedents']}\n</details>\n\n"
md += "---\n\n"
# Stats
md += f"\n\n---\n*Drafted {total} clauses using template engine + {len(order)} clause playbook.*\n"
md += "*Upgrade to GPU hardware (t4-small in Space Settings) for AI-powered LLM drafting.*"
return md
with gr.Blocks(title="Contract Drafting Assistant", theme=gr.themes.Soft()) as demo:
gr.Markdown("""# πŸ“ Contract Drafting Assistant
**Draft full legal contracts from deal context and business constraints.**
β“˜ Currently running on free CPU with template engine + 37K clause retriever.
*Upgrade to GPU (t4-small) in Space Settings for SaulLM-7B AI drafting.*
""")
with gr.Row():
with gr.Column(scale=1):
contract_type = gr.Dropdown(
label="Contract Type",
choices=CONTRACT_TYPES,
value="saas_agreement",
)
party_position = gr.Dropdown(
label="Party Position",
choices=PARTY_POSITIONS,
value="pro_company",
)
company = gr.Textbox(label="Company Name", placeholder="DataVault Analytics Inc")
counterparty = gr.Textbox(label="Counterparty Name", placeholder="First Regional Bank")
governing_law = gr.Textbox(label="Governing Law", value="Delaware")
deal_context = gr.Textbox(
label="Deal Context",
lines=4,
placeholder="Describe the deal: e.g., SaaS platform for real-time data analytics, 3-year term, $50K/yr...",
)
constraints = gr.Textbox(
label="Business Constraints (comma-separated)",
lines=2,
placeholder="SOC 2 Type II, annual billing, 99.9% uptime SLA, 30-day termination",
)
draft_btn = gr.Button("Draft Contract", variant="primary", size="lg")
with gr.Column(scale=3):
output = gr.Markdown(
label="Drafted Contract",
value="*Enter deal details and click **Draft Contract** to generate your agreement.*",
)
draft_btn.click(
fn=draft_contract,
inputs=[contract_type, party_position, company, counterparty, deal_context, constraints, governing_law],
outputs=output,
)
gr.Markdown("""---
### How It Works
1. **Playbook** β€” looks up required clauses for your contract type + party position (129 rule pairs, 57 fallbacks)
2. **Dense Retriever** β€” BM25 + semantic search over 37,508 EDGAR contract clauses for precedent references
3. **Template Engine** β€” 100+ position-aware, context-aware clause templates across 9 contract types
4. **EDGAR References** β€” each clause cites real precedents from SEC filings
### Performance (V13 eval, LLM mode)
| Contract Type | Overall Score |
|---|---|
| DPA | 0.935 |
| NDA | 0.905 |
| SaaS | 0.866 |
| IP Assignment | 0.835 |
| Consulting | 0.806 |
| MSA | 0.740 |
| Vendor | 0.713 |
### Upgrade to AI Drafting
Switch Space hardware to **t4-small** in Space Settings β†’ Restart β†’ LLM takes over drafting with SaulLM-7B fine-tuned on 126 contract examples.
""")
demo.queue(default_concurrency_limit=5).launch(server_name="0.0.0.0")