"""Gradio Space — Contract Drafting Assistant. Runs on free CPU. Uses template engine + 37K clause retriever. GPU upgrade needed for LLM-powered drafting: switch to t4-small in Settings. """ import os, sys, json, shutil os.environ["PYTHONUNBUFFERED"] = "1" import gradio as gr import numpy as np from huggingface_hub import hf_hub_download, HfApi HUB_USER = "narcolepticchicken" WORK_DIR = "/tmp" os.makedirs(WORK_DIR, exist_ok=True) print("=== Contract Drafting Assistant ===") # Local imports — bundled in this Space repo from playbook import get_required_clauses, get_fallback_position from dense_retriever import ClauseRetrieverV4 try: from templates_v2 import render_template except Exception as e: print(f"[WARN] templates_v2 not available: {e}") render_template = None CONTRACT_TYPES = [ "saas_agreement", "msa", "nda", "dpa", "sow", "vendor_agreement", "consulting_agreement", "ip_assignment", "employment_agreement", ] PARTY_POSITIONS = ["pro_company", "balanced", "pro_counterparty"] _retriever = None def get_retriever(): global _retriever if _retriever: return _retriever INDEX_HUB = f"{HUB_USER}/contract-clause-index-v1" INDEX_PATH = os.path.join(WORK_DIR, "dense_index") # Try to use HF token if available (secret or env var) token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN") or True api = HfApi(token=token) try: api.list_repo_files(INDEX_HUB, repo_type="dataset") os.makedirs(INDEX_PATH, exist_ok=True) for fn in ["corpus.json", "embeddings.npy", "bm25.pkl"]: shutil.copy( hf_hub_download(repo_id=INDEX_HUB, filename=fn, repo_type="dataset", token=token), os.path.join(INDEX_PATH, fn), ) _retriever = ClauseRetrieverV4() _retriever.load(INDEX_PATH) print(f" [OK] Retriever loaded ({len(_retriever.corpus)} clauses)") except Exception as e: print(f" [WARN] No index: {e}") _retriever = None return _retriever def retrieve_precedents(clause_name, contract_type): """Get reference clauses from EDGAR corpus.""" retriever = get_retriever() if not retriever: return "" try: results = retriever.retrieve( query=f"{clause_name.replace('_',' ')} {contract_type.replace('_',' ')}", top_k=2, ) if results: return "REFERENCES (EDGAR):\n\n" + "\n---\n".join( f"{r['clause_text'][:500]}" for r in results[:2] ) except Exception as e: print(f" [RETRIEVE FAIL] {clause_name}: {e}") return "" def draft_with_template(clause_name, contract_type, party_position, company, counterparty, deal_context, constraints_text, governing_law): """Draft a clause using the template engine with context.""" if not render_template: return f"[{clause_name.replace('_', ' ').title()} — template engine unavailable]" cons = constraints_text.split(",") if constraints_text else [] fb = get_fallback_position(clause_name, party_position) ctx = { "company_name": company, "counterparty_name": counterparty, "contract_type": contract_type, "party_position": party_position, "governing_law": governing_law, "deal_context": deal_context, "business_constraints": cons, } try: text = render_template(clause_name, ctx, fb or {}) if text and len(text) >= 5: return text except Exception as e: print(f" [TEMPLATE FAIL] {clause_name}: {e}") return f"[{clause_name.replace('_', ' ').title()} — no template available]" def draft_contract(contract_type, party_position, company, counterparty, deal_context, constraints_text, governing_law, progress=gr.Progress()): """Draft a full contract clause by clause.""" # Build clause order required = get_required_clauses(contract_type) standard = [ "entire_agreement", "severability", "waiver", "force_majeure", "notices", "relationship_of_parties", "no_third_party_beneficiaries", "amendments", "governing_law", "signature_block", ] order = ["preamble", "definitions"] + required + standard seen = set() order = [x for x in order if not (x in seen or seen.add(x))] total = len(order) clauses = [] for i, clause_name in enumerate(order): progress((i + 1) / total, f"Drafting: {clause_name.replace('_', ' ')}...") # Get precedents for reference precedents = retrieve_precedents(clause_name, contract_type) # Draft via template text = draft_with_template( clause_name, contract_type, party_position, company, counterparty, deal_context, constraints_text, governing_law, ) clauses.append({ "name": clause_name, "text": text, "precedents": precedents, }) # Render markdown md = f"# {contract_type.replace('_', ' ').title()}\n\n" md += f"**{company} ⬄ {counterparty}**\n\n" md += f"**Position**: {party_position.replace('_', ' ').title()}" md += f" | **Governing Law**: {governing_law}\n\n" md += f"**Deal**: {deal_context}\n" md += f"**Constraints**: {constraints_text or 'none'}\n\n---\n\n" for c in clauses: md += f"## {c['name'].replace('_', ' ').title()}\n\n{c['text']}\n\n" if c.get("precedents"): md += f"
\n📚 Precedent References\n\n{c['precedents']}\n
\n\n" md += "---\n\n" # Stats md += f"\n\n---\n*Drafted {total} clauses using template engine + {len(order)} clause playbook.*\n" md += "*Upgrade to GPU hardware (t4-small in Space Settings) for AI-powered LLM drafting.*" return md with gr.Blocks(title="Contract Drafting Assistant", theme=gr.themes.Soft()) as demo: gr.Markdown("""# 📝 Contract Drafting Assistant **Draft full legal contracts from deal context and business constraints.** ⓘ Currently running on free CPU with template engine + 37K clause retriever. *Upgrade to GPU (t4-small) in Space Settings for SaulLM-7B AI drafting.* """) with gr.Row(): with gr.Column(scale=1): contract_type = gr.Dropdown( label="Contract Type", choices=CONTRACT_TYPES, value="saas_agreement", ) party_position = gr.Dropdown( label="Party Position", choices=PARTY_POSITIONS, value="pro_company", ) company = gr.Textbox(label="Company Name", placeholder="DataVault Analytics Inc") counterparty = gr.Textbox(label="Counterparty Name", placeholder="First Regional Bank") governing_law = gr.Textbox(label="Governing Law", value="Delaware") deal_context = gr.Textbox( label="Deal Context", lines=4, placeholder="Describe the deal: e.g., SaaS platform for real-time data analytics, 3-year term, $50K/yr...", ) constraints = gr.Textbox( label="Business Constraints (comma-separated)", lines=2, placeholder="SOC 2 Type II, annual billing, 99.9% uptime SLA, 30-day termination", ) draft_btn = gr.Button("Draft Contract", variant="primary", size="lg") with gr.Column(scale=3): output = gr.Markdown( label="Drafted Contract", value="*Enter deal details and click **Draft Contract** to generate your agreement.*", ) draft_btn.click( fn=draft_contract, inputs=[contract_type, party_position, company, counterparty, deal_context, constraints, governing_law], outputs=output, ) gr.Markdown("""--- ### How It Works 1. **Playbook** — looks up required clauses for your contract type + party position (129 rule pairs, 57 fallbacks) 2. **Dense Retriever** — BM25 + semantic search over 37,508 EDGAR contract clauses for precedent references 3. **Template Engine** — 100+ position-aware, context-aware clause templates across 9 contract types 4. **EDGAR References** — each clause cites real precedents from SEC filings ### Performance (V13 eval, LLM mode) | Contract Type | Overall Score | |---|---| | DPA | 0.935 | | NDA | 0.905 | | SaaS | 0.866 | | IP Assignment | 0.835 | | Consulting | 0.806 | | MSA | 0.740 | | Vendor | 0.713 | ### Upgrade to AI Drafting Switch Space hardware to **t4-small** in Space Settings → Restart → LLM takes over drafting with SaulLM-7B fine-tuned on 126 contract examples. """) demo.queue(default_concurrency_limit=5).launch(server_name="0.0.0.0")