Spaces:

akshay1306
/

contractpulse

Running

App Files Files Community

akshay1306 commited on Apr 28

Commit

b3280aa

verified ·

1 Parent(s): c61aab1

Upload 7 files

Browse files

Files changed (7) hide show

Dockerfile +22 -0
clause_extractor.py +384 -0
main.py +658 -0
model3.py +200 -0
scheduler_api.py +548 -0
test_pipeline.py +81 -0
test_routes.py +14 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,22 @@

+FROM python:3.10-slim
+WORKDIR /app
+# Install system dependencies (needed for pandas, prophet, etc.)
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    gcc \
+    g++ \
+    && rm -rf /var/lib/apt/lists/*
+# Copy backend code
+COPY . /app
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Expose HF required port
+EXPOSE 7860
+# Start app
+CMD ["python", "main.py"]

clause_extractor.py ADDED Viewed

	@@ -0,0 +1,384 @@

+"""
+Cross-Contract Clause Extractor and Pair Generator
+Uses Groq API (groq.com) for clause extraction
+Feeds pairs into Model 3 (NLI conflict detection)
+Install: pip install groq
+API key: https://console.groq.com
+"""
+import os
+import json
+import torch
+from groq import Groq
+from transformers import pipeline as hf_pipeline, AutoTokenizer
+from dotenv import load_dotenv
+load_dotenv()
+# ── Config ────────────────────────────────────────────────────────────────────
+GROQ_API_KEY = os.getenv("GROQ_API_KEY")
+print("API KEY:", GROQ_API_KEY)
+MODEL3_DIR = "../model_3"          # path to your saved Model 3
+GROQ_MODEL      = "openai/gpt-oss-120b"       # same model you're already using
+MAX_LEN         = 512                          # must match Model 3 training config
+CONF_THRESHOLD  = 0.7                         # flag pairs below this as uncertain
+CLAUSE_TYPES = [
+    "termination",
+    "warranty",
+    "indemnification",
+    "ip_ownership",
+    "dispute_resolution",
+    "confidentiality",
+    "liability_cap",
+    "governing_law",
+    "payment",
+    "non_compete",
+    "force_majeure",
+    "assignment",
+]
+# ── Groq client ───────────────────────────────────────────────────────────────
+groq_client = Groq(api_key=GROQ_API_KEY)
+# ── Step 1: Extract clauses from a single contract ────────────────────────────
+EXTRACTION_SYSTEM_PROMPT = """You are a legal clause extraction engine.
+Your job is to extract distinct legal clauses from contract text.
+You must return ONLY a valid JSON array — no explanation, no markdown fences, no preamble.
+Never extract financial covenant clauses with numeric thresholds — those are handled separately."""
+EXTRACTION_USER_PROMPT = """Extract all distinct legal clauses from this contract.
+For each clause return:
+- clause_type: one of [{clause_types}]
+- clause_text: the core legal obligation rewritten concisely in 1-2 sentences. Max 60 words. Do NOT copy verbatim.
+Rules:
+- One entry per clause_type maximum. If duplicates exist, keep the most restrictive.
+- Skip purely numeric clauses like "maintain debt ratio >= 2.5" — financial covenants only.
+- Skip any clause that does not fit the listed types.
+Return format — JSON array only, nothing else:
+[
+  {{"clause_type": "termination", "clause_text": "Either party may terminate with 30 days written notice."}},
+  {{"clause_type": "dispute_resolution", "clause_text": "All disputes resolved through binding arbitration in New York."}}
+]
+Contract text:
+{contract_text}"""
+def extract_clauses(contract_text: str, contract_label: str) -> list[dict]:
+    """
+    Call Groq to extract and classify clauses from one contract.
+    Returns list of {clause_type, clause_text, contract} dicts.
+    """
+    prompt = EXTRACTION_USER_PROMPT.format(
+        clause_types=", ".join(CLAUSE_TYPES),
+        contract_text=contract_text.strip()
+    )
+    # Non-streaming — we need the full response before JSON parsing
+    completion = groq_client.chat.completions.create(
+        model=GROQ_MODEL,
+        messages=[
+            {"role": "system", "content": EXTRACTION_SYSTEM_PROMPT},
+            {"role": "user",   "content": prompt},
+        ],
+        temperature=0,              # deterministic extraction
+        max_completion_tokens=2048,
+        top_p=1,
+        reasoning_effort="medium",
+        stream=False,               # must be False — need full JSON before parsing
+        stop=None,
+    )
+    raw = completion.choices[0].message.content.strip()
+    # Strip markdown fences if model adds them anyway
+    if raw.startswith("```"):
+        raw = raw.split("```")[1]
+        if raw.startswith("json"):
+            raw = raw[4:]
+        raw = raw.strip()
+    try:
+        clauses = json.loads(raw)
+        # Handle if model wraps array in a dict
+        if isinstance(clauses, dict):
+            clauses = next(iter(clauses.values()))
+    except json.JSONDecodeError as e:
+        print(f"[ERROR] JSON parse failed for {contract_label}: {e}")
+        print(f"Raw response was:\n{raw[:400]}")
+        return []
+    for c in clauses:
+        c["contract"] = contract_label
+    print(f"\n[{contract_label}] Extracted {len(clauses)} clauses:")
+    for c in clauses:
+        print(f"  [{c['clause_type']}] {c['clause_text'][:80]}...")
+    return clauses
+# ── Step 2: Pair same-type clauses across contracts ───────────────────────────
+def generate_pairs(
+    clauses_a: list[dict],
+    clauses_b: list[dict],
+) -> list[dict]:
+    """
+    Match clauses of the same type across Contract A and Contract B.
+    Returns list of {clause_type, clause_a, clause_b} dicts.
+    """
+    index_a = {c["clause_type"]: c["clause_text"] for c in clauses_a}
+    index_b = {c["clause_type"]: c["clause_text"] for c in clauses_b}
+    matched_types   = set(index_a.keys()) & set(index_b.keys())
+    unmatched_types = set(index_a.keys()).symmetric_difference(set(index_b.keys()))
+    pairs = [
+        {
+            "clause_type": clause_type,
+            "clause_a":    index_a[clause_type],
+            "clause_b":    index_b[clause_type],
+        }
+        for clause_type in matched_types
+    ]
+    print(f"\n[PAIRING] {len(pairs)} matching types: {sorted(matched_types)}")
+    if unmatched_types:
+        print(f"[PAIRING] Only in one contract (skipped): {sorted(unmatched_types)}")
+    return pairs
+# ── Step 3: Validate token lengths before inference ───────────────────────────
+def check_token_length(tokenizer, clause_a: str, clause_b: str, max_len: int) -> int:
+    """Returns token count. Warns if truncation will occur."""
+    tokens = tokenizer(
+        f"{clause_a} [SEP] {clause_b}",
+        return_tensors="pt",
+        truncation=False,
+    )
+    length = tokens["input_ids"].shape[1]
+    if length > max_len:
+        print(f"  [WARN] {length} tokens > MAX_LEN {max_len} — will be truncated")
+    elif length > int(max_len * 0.85):
+        print(f"  [WARN] {length} tokens is close to limit ({max_len})")
+    return length
+# ── Step 4: Load and run Model 3 ─────────────────────────────────────────────
+def load_model3(model_dir: str, max_len: int):
+    tokenizer = AutoTokenizer.from_pretrained(model_dir)
+    device    = 0 if torch.cuda.is_available() else -1
+    pipe = hf_pipeline(
+        "text-classification",
+        model=model_dir,
+        tokenizer=tokenizer,
+        device=device,
+        top_k=None,
+        truncation=True,
+        max_length=max_len,
+        return_token_type_ids=False  # 🔥 ADD THIS
+    )
+    print(f"\n[MODEL3] Loaded from '{model_dir}' on {'GPU' if device == 0 else 'CPU'}")
+    return pipe, tokenizer
+def score_pairs(
+    pairs: list[dict],
+    pipe,
+    tokenizer,
+    max_len: int,
+    conf_threshold: float,
+) -> list[dict]:
+    """
+    Run Model 3 on each clause pair.
+    Returns results sorted: contradictions first, then by confidence descending.
+    """
+    results = []
+    for pair in pairs:
+        clause_a    = pair["clause_a"]
+        clause_b    = pair["clause_b"]
+        clause_type = pair["clause_type"]
+        token_len  = check_token_length(tokenizer, clause_a, clause_b, max_len)
+        raw_result = pipe(f"{clause_a} [SEP] {clause_b}")
+        if raw_result and isinstance(raw_result[0], list):
+            raw_result = raw_result[0]
+        scores              = {r["label"]: r["score"] for r in raw_result}
+        predicted_label     = max(scores, key=scores.get)
+        predicted_score     = scores[predicted_label]
+        contradiction_score = scores.get("contradiction", 0.0)
+        results.append({
+            "clause_type":         clause_type,
+            "clause_a":            clause_a,
+            "clause_b":            clause_b,
+            "predicted_label":     predicted_label,
+            "predicted_score":     round(predicted_score, 4),
+            "contradiction_score": round(contradiction_score, 4),
+            "all_scores":          {k: round(v, 4) for k, v in scores.items()},
+            "token_length":        token_len,
+            "uncertain":           predicted_score < conf_threshold,
+        })
+    # Contradictions first, then sorted by confidence descending
+    results.sort(key=lambda x: (
+        x["predicted_label"] != "contradiction",
+        -x["predicted_score"],
+    ))
+# ✅ Keep only strong, reliable contradictions
+    return results
+# ── Step 5: Print results ─────────────────────────────────────────────────────
+def print_results(results: list[dict]):
+    # 🔥 Split results
+    strong = [
+        r for r in results
+        if r["predicted_label"] == "contradiction"
+        and not r["uncertain"]
+        and r["predicted_score"] >= 0.75
+    ]
+    uncertain = [
+        r for r in results
+        if r["uncertain"]
+    ]
+    print("\n" + "=" * 65)
+    print("CONTRACT CONFLICT ANALYSIS")
+    print("=" * 65)
+    # ✅ PRIMARY SECTION
+    print("\n── HIGH-CONFIDENCE CONFLICTS ─────────────────────────────")
+    print("[INFO] These are reliable contradictions (>= 75%)")
+    if strong:
+        for r in strong:
+            print(f"\n[{r['clause_type'].upper()}] {r['predicted_score']:.2%}")
+            print(f"Contract A: {r['clause_a']}")
+            print(f"Contract B: {r['clause_b']}")
+    else:
+        print("  None found.")
+    # ⚠️ SECONDARY SECTION
+    print("\n── UNCERTAIN / REVIEW NEEDED ─────────────────────────────")
+    print("[INFO] Lower-confidence predictions — require human validation")
+    if uncertain:
+        for r in uncertain:
+            print(f"\n[{r['clause_type'].upper()}] "
+                  f"{r['predicted_label']} ({r['predicted_score']:.2%})")
+            print(f"Contract A: {r['clause_a']}")
+            print(f"Contract B: {r['clause_b']}")
+    else:
+        print("  None.")
+    print("\n" + "=" * 65)
+# ── Main pipeline ─────────────────────────────────────────────────────────────
+def run_pipeline(
+    contract_a_text: str,
+    contract_b_text: str,
+    model3_dir: str       = MODEL3_DIR,
+    max_len: int          = MAX_LEN,
+    conf_threshold: float = CONF_THRESHOLD,
+) -> list[dict]:
+    """
+    Full pipeline:
+      1. Groq extracts + classifies clauses from both contracts
+      2. Same-type clauses are paired across contracts
+      3. Model 3 scores each pair for contradiction
+      4. Results returned sorted by conflict severity
+    """
+    print("\n── STEP 1: Extracting clauses via Groq ────────────────────")
+    clauses_a = extract_clauses(contract_a_text, "Contract A")
+    clauses_b = extract_clauses(contract_b_text, "Contract B")
+    if not clauses_a or not clauses_b:
+        print("[ERROR] Extraction returned empty. Check GROQ_API_KEY and contract text.")
+        return []
+    print("\n── STEP 2: Generating clause pairs ────────────────────────")
+    pairs = generate_pairs(clauses_a, clauses_b)
+    if not pairs:
+        print("[WARN] No matching clause types between contracts.")
+        return []
+    print(f"\n── STEP 3: Scoring {len(pairs)} pairs with Model 3 ────────")
+    pipe, tokenizer = load_model3(model3_dir, max_len)
+    results = score_pairs(pairs, pipe, tokenizer, max_len, conf_threshold)
+    print_results(results)
+    return results
+# ── Example usage ─────────────────────────────────────────────────────────────
+if __name__ == "__main__":
+    CONTRACT_A = """
+    VENDOR AGREEMENT 2024
+    Termination: Either party may terminate this agreement for convenience
+    upon 30 days written notice to the other party.
+    Warranties: Seller warrants that all deliverables shall be free from
+    defects for a period of 24 months from the date of acceptance by Buyer.
+    Dispute Resolution: All disputes arising under this agreement shall be
+    resolved through binding arbitration in New York under AAA rules.
+    Intellectual Property: The Licensee is granted an exclusive, worldwide,
+    perpetual license to use the Software and all derivative works.
+    Confidentiality: Neither party shall disclose Confidential Information
+    to any third party without prior written consent of the disclosing party.
+    Governing Law: This agreement shall be governed by the laws of Delaware.
+    """
+    CONTRACT_B = """
+    MASTER SERVICES AGREEMENT 2024
+    Termination: This agreement may only be terminated for cause, specifically
+    material breach that remains uncured for 60 days after written notice.
+    Warranties: Seller disclaims all warranties, express or implied, including
+    any warranty of merchantability or fitness for a particular purpose.
+    Dispute Resolution: Either party may bring suit in any court of competent
+    jurisdiction to resolve disputes arising under this agreement.
+    Intellectual Property: The license granted herein is non-exclusive, limited
+    to the United States, and valid for 12 months only from the effective date.
+    Confidentiality: Confidential Information must not be shared with outside
+    parties unless the disclosing party agrees in writing beforehand.
+    Governing Law: This agreement is governed by the laws of California.
+    """
+    results = run_pipeline(CONTRACT_A, CONTRACT_B)
+    with open("conflict_results.json", "w") as f:
+        json.dump(results, f, indent=2)
+    print("\nResults saved to conflict_results.json")

main.py ADDED Viewed

	@@ -0,0 +1,658 @@

+import os
+import sys
+import io
+import pickle
+import secrets
+import threading
+from datetime import datetime, timezone
+from functools import wraps
+import httpx
+import urllib.parse
+import numpy as np
+import pandas as pd
+import pdfplumber
+from bson import ObjectId
+from dotenv import load_dotenv
+from flask import Flask, jsonify, redirect, request, session
+from flask_cors import CORS
+from flask_session import Session
+from pymongo import MongoClient
+import certifi
+from werkzeug.security import check_password_hash, generate_password_hash
+from all_model_code.model_1_code.pipeline import ObligationPipeline
+from scheduler_api import scheduler_bp, scheduler, BreachedObligation, ObligationType
+# ── clause_extractor (for two-contract comparison) ────────────────────────────
+# Adjust EXTRACTOR_DIR if clause_extractor.py lives elsewhere relative to main.py
+EXTRACTOR_DIR = os.path.join(os.path.dirname(__file__), "..")
+sys.path.insert(0, EXTRACTOR_DIR)
+try:
+    from clause_extractor import extract_clauses, generate_pairs, load_model3, score_pairs
+    EXTRACTOR_AVAILABLE = True
+except ImportError as e:
+    print(f"[WARN] clause_extractor not found: {e}. /api/compare will run in mock mode.")
+    EXTRACTOR_AVAILABLE = False
+# ─── Load env ─────────────────────────────────────────────────────────────────
+load_dotenv()
+MONGO_URI        = os.getenv("MONGO_URI")
+GOOGLE_CLIENT_ID = os.getenv("GOOGLE_CLIENT_ID")
+GOOGLE_SECRET    = os.getenv("GOOGLE_CLIENT_SECRET")
+GOOGLE_REDIRECT  = os.getenv("GOOGLE_REDIRECT_URI")
+FRONTEND_URL     = os.getenv("FRONTEND_URL", "http://localhost:3000")
+SECRET_KEY       = os.getenv("SECRET_KEY", secrets.token_hex(32))
+IS_PROD          = os.getenv("FLASK_ENV", "development") == "production"
+CONF_THRESHOLD   = float(os.getenv("CONF_THRESHOLD", "0.7"))
+MAX_LEN          = int(os.getenv("MAX_LEN", "512"))
+# ─── MongoDB ──────────────────────────────────────────────────────────────────
+try:
+    mongo_client = MongoClient(MONGO_URI, tlsCAFile=certifi.where())
+    _db = mongo_client["userinfo"]
+    _db.users.create_index("email", unique=True, sparse=True)
+    print("Connected to MongoDB")
+except Exception as e:
+    print(f"MongoDB connection failed: {e}")
+    if "SSL" in str(e) or "tls" in str(e).lower():
+        print("\n" + "!" * 60)
+        print("CRITICAL: ATLAS IP WHITELIST BLOCKED!")
+        print("MongoDB Atlas enforces IP whitelisting by aggressively dropping")
+        print("the TLS/SSL handshake. This 'tls1 alert internal error' means")
+        print("your current network IP is not added to your Atlas allowlist.")
+        print("Log into MongoDB Atlas -> Security -> Network Access -> Add IP")
+        print("!" * 60 + "\n")
+    raise
+def db():
+    return _db
+def now():
+    return datetime.now(timezone.utc)
+# ─── App setup ────────────────────────────────────────────────────────────────
+app = Flask(__name__)
+# app.secret_key = secrets.token_hex(16)
+CORS(
+    app,
+    origins              = [FRONTEND_URL, "http://localhost:3000", "http://127.0.0.1:3000"],
+    supports_credentials = True,
+    allow_headers        = ["Content-Type", "Authorization"],
+    methods              = ["GET", "POST", "PUT", "DELETE", "OPTIONS"],
+)
+app.register_blueprint(scheduler_bp)
+app.config.update(
+    SECRET_KEY              = os.getenv("SECRET_KEY") or secrets.token_hex(32),
+    SESSION_TYPE            = "filesystem",
+    SESSION_FILE_DIR        = os.path.join(os.getcwd(), "session_data"),
+    SESSION_COOKIE_SAMESITE = "Lax",
+    SESSION_COOKIE_SECURE   = IS_PROD,
+)
+Session(app)
+http = httpx.Client()
+# ─── Auth helpers ─────────────────────────────────────────────────────────────
+def require_auth(fn):
+    @wraps(fn)
+    def inner(*a, **kw):
+        if "user_id" not in session:
+            return jsonify({"error": "Not authenticated"}), 401
+        return fn(*a, **kw)
+    return inner
+def uid():
+    return session.get("user_id")
+def serialize_user(user):
+    """Return a safe dict — never exposes the hashed password."""
+    return {
+        "id":         str(user["_id"]),
+        "name":       user.get("name"),
+        "email":      user.get("email"),
+        "picture":    user.get("picture"),
+        "provider":   user.get("provider", "email"),
+        "created_at": user["created_at"].isoformat() if user.get("created_at") else None,
+    }
+# ─── Email / Password auth ────────────────────────────────────────────────────
+@app.route("/auth/register", methods=["POST"])
+def register():
+    data     = request.get_json(silent=True) or {}
+    name     = (data.get("name") or "").strip()
+    email    = (data.get("email") or "").strip().lower()
+    password = data.get("password") or ""
+    if not name:
+        return jsonify({"error": "Name is required"}), 400
+    if not email or "@" not in email:
+        return jsonify({"error": "A valid email is required"}), 400
+    if len(password) < 8:
+        return jsonify({"error": "Password must be at least 8 characters"}), 400
+    if db().users.find_one({"email": email, "provider": "email"}):
+        return jsonify({"error": "An account with that email already exists"}), 409
+    hashed   = generate_password_hash(password)
+    user_doc = {
+        "name": name, "email": email, "password": hashed,
+        "picture": None, "provider": "email", "provider_id": None,
+        "created_at": now(), "updated_at": now(),
+    }
+    result = db().users.insert_one(user_doc)
+    session["user_id"]   = str(result.inserted_id)
+    session["user_name"] = name
+    user_doc["_id"]      = result.inserted_id
+    return jsonify({"message": "Account created", "user": serialize_user(user_doc)}), 201
+@app.route("/auth/login", methods=["POST"])
+def login():
+    data     = request.get_json(silent=True) or {}
+    email    = (data.get("email") or "").strip().lower()
+    password = data.get("password") or ""
+    if not email or not password:
+        return jsonify({"error": "Email and password are required"}), 400
+    user        = db().users.find_one({"email": email, "provider": "email"})
+    dummy_hash  = generate_password_hash("__dummy__")
+    stored_hash = user["password"] if user else dummy_hash
+    valid       = check_password_hash(stored_hash, password)
+    if not user or not valid:
+        return jsonify({"error": "Invalid email or password"}), 401
+    session["user_id"]   = str(user["_id"])
+    session["user_name"] = user.get("name")
+    return jsonify({"message": "Logged in", "user": serialize_user(user)})
+# ─── Google OAuth ─────────────────────────────────────────────────────────────
+@app.route("/auth/login/google")
+def google_login():
+    state = secrets.token_urlsafe(16)
+    session["oauth_state"] = state
+    params = {
+        "client_id": GOOGLE_CLIENT_ID, "redirect_uri": GOOGLE_REDIRECT,
+        "response_type": "code", "scope": "openid email profile",
+        "state": state, "access_type": "online", "prompt": "select_account",
+    }
+    return redirect(f"https://accounts.google.com/o/oauth2/v2/auth?{urllib.parse.urlencode(params)}")
+@app.route("/auth/callback/google")
+def google_callback():
+    if request.args.get("state") != session.pop("oauth_state", None):
+        return jsonify({"error": "Invalid state — possible CSRF"}), 400
+    code = request.args.get("code")
+    if not code:
+        return jsonify({"error": "No authorization code returned from Google"}), 400
+    token_res = http.post(
+        "https://oauth2.googleapis.com/token",
+        data={
+            "code": code, "client_id": GOOGLE_CLIENT_ID,
+            "client_secret": GOOGLE_SECRET, "redirect_uri": GOOGLE_REDIRECT,
+            "grant_type": "authorization_code",
+        },
+    ).json()
+    access_token = token_res.get("access_token")
+    if not access_token:
+        return jsonify({"error": "Token exchange failed", "detail": token_res}), 400
+    info = http.get(
+        "https://www.googleapis.com/oauth2/v3/userinfo",
+        headers={"Authorization": f"Bearer {access_token}"},
+    ).json()
+    if not info.get("sub"):
+        return jsonify({"error": "Could not retrieve user info from Google"}), 400
+    user = db().users.find_one_and_update(
+        {"provider_id": info["sub"], "provider": "google"},
+        {
+            "$set": {"name": info.get("name"), "email": info.get("email"),
+                     "picture": info.get("picture"), "updated_at": now()},
+            "$setOnInsert": {"provider": "google", "provider_id": info["sub"], "created_at": now()},
+        },
+        upsert=True,
+        return_document=True,
+    )
+    session["user_id"]   = str(user["_id"])
+    session["user_name"] = info.get("name")
+    return redirect(f"{FRONTEND_URL}/dashboard")
+# ─── Session routes ───────────────────────────────────────────────────────────
+@app.route("/auth/me")
+def auth_me():
+    if "user_id" not in session:
+        return jsonify({"authenticated": False}), 200
+    user = db().users.find_one({"_id": ObjectId(uid())})
+    if not user:
+        session.clear()
+        return jsonify({"authenticated": False}), 200
+    return jsonify({"authenticated": True, "user": serialize_user(user)})
+@app.route("/auth/logout", methods=["POST"])
+def logout():
+    session.clear()
+    return jsonify({"message": "Logged out"})
+@app.route("/api/profile")
+@require_auth
+def profile():
+    user = db().users.find_one({"_id": ObjectId(uid())})
+    if not user:
+        return jsonify({"error": "User not found"}), 404
+    return jsonify(serialize_user(user))
+# ─── ML Model Registry (lazy-loaded, thread-safe) ────────────────────────────
+_models: dict      = {}
+_model_lock        = threading.Lock()
+BASE_DIR           = os.path.dirname(os.path.abspath(__file__))
+_compare_model_cache: dict = {}   # separate cache for clause_extractor's model3
+def _load_obligation_pipeline():
+    config = {
+        "model_name": os.path.join(BASE_DIR, "ckpt_obligation_fast"),
+        "device": "cpu",
+        "filter_min_confidence": 0.1,
+        "min_fields": 2,
+    }
+    return ObligationPipeline(config)
+def _load_nli_model():
+    from transformers import pipeline as hf_pipeline
+    path = os.path.join(BASE_DIR, "model_3")
+    return hf_pipeline(
+        "text-classification", model=path, tokenizer=path,
+        device=-1, top_k=None, truncation=True, max_length=128,
+    )
+def _load_risk_bundle():
+    pkl_path = os.path.join(BASE_DIR, "risk_model_v10_extended.pkl")
+    with open(pkl_path, "rb") as f:
+        return pickle.load(f)
+def get_model(key: str):
+    if key not in _models:
+        with _model_lock:
+            if key not in _models:
+                print(f"[ML] Loading model: {key} …")
+                if key == "obligation":
+                    _models[key] = _load_obligation_pipeline()
+                elif key == "nli":
+                    _models[key] = _load_nli_model()
+                elif key == "risk":
+                    _models[key] = _load_risk_bundle()
+                print(f"[ML] Model '{key}' ready.")
+    return _models[key]
+def _get_compare_model():
+    """Lazy-load clause_extractor's model3 (used by /api/compare)."""
+    if not _compare_model_cache:
+        pipe, tokenizer = load_model3(os.path.join(BASE_DIR, "model_3"), MAX_LEN)
+        _compare_model_cache["pipe"]      = pipe
+        _compare_model_cache["tokenizer"] = tokenizer
+    return _compare_model_cache["pipe"], _compare_model_cache["tokenizer"]
+# ─── PDF / text extraction helper ────────────────────────────────────────────
+def extract_text_from_request() -> str:
+    if "file" in request.files:
+        raw = request.files["file"].read()
+        with pdfplumber.open(io.BytesIO(raw)) as pdf:
+            return "\n".join(p.extract_text() or "" for p in pdf.pages)
+    return (request.get_json(silent=True) or {}).get("text", "")
+# ─── COVENANT OBLIGATION EXTRACTION (/api/analyze) ───────────────────────────
+@app.route("/api/analyze", methods=["POST"])
+# @require_auth   # Uncomment to lock behind auth
+def analyze_contract():
+    text = extract_text_from_request()
+    print("\n" + "=" * 40)
+    print("--- 1. INCOMING TEXT TO AI ---")
+    print(text[:400].strip() if text else "WARNING: TEXT IS EMPTY!")
+    print("=" * 40 + "\n")
+    if not text.strip():
+        return jsonify({"error": "No contract text provided"}), 400
+    pipeline = get_model("obligation")
+    try:
+        raw_results = pipeline.process(
+            source=text, source_type="text", contract_id="api_upload", debug=True
+        )
+        print("\n" + "=" * 40)
+        print("--- 2. RAW PIPELINE OUTPUT ---")
+        print(raw_results)
+        print("=" * 40 + "\n")
+        try:
+            from all_model_code.model_1_code.stage1_ingestion import ingest
+            from all_model_code.model_1_code.stage2_cleaning import clean_text
+            cleaned_text = clean_text(ingest(text, "text"))
+        except Exception:
+            cleaned_text = text
+        obligations = []
+        for i, r in enumerate(raw_results):
+            metric_name = r.get("metric_name", "Unknown Metric")
+            op          = r.get("operator", "must maintain")
+            val         = r.get("threshold_value", "a specific value")
+            score       = r.get("confidence_score", 0.5)
+            risk        = max(5, min(95, round((1 - score) * 80 + 10)))
+            obligations.append({
+                "id":          f"C{i+1}",
+                "clause":      str(metric_name).replace("_", " ").title()[:30],
+                "type":        "Financial Covenant",
+                "desc":        f"The entity {op} a {metric_name} of {val}.",
+                "confidence":  round(score * 100, 1),
+                "risk":        risk,
+                "source_text": r.get("source_text", ""),
+            })
+        if not obligations:
+            return jsonify({"error": "No strict numerical obligations found."}), 422
+        return jsonify({
+            "obligations":   obligations,
+            "clause_count":  len(obligations),
+            "contract_text": cleaned_text,
+        })
+    except Exception as e:
+        print(f"[analyze] Pipeline error: {e}")
+        return jsonify({"error": "Failed to process contract through AI pipeline."}), 500
+# ─── CROSS-CONTRACT CONFLICT COMPARISON (/api/compare) ───────────────────────
+# Uses clause_extractor.py + Groq to extract and compare two full contracts.
+# Falls back to mock data when EXTRACTOR_AVAILABLE is False.
+MOCK_COMPARE_RESPONSE = {
+    "clauses_a": [
+        {"clause_type": "termination",        "clause_text": "Either party may terminate this agreement for convenience upon 30 days written notice.",                      "contract": "Contract A"},
+        {"clause_type": "warranty",           "clause_text": "Seller warrants all deliverables shall be free from defects for 24 months from acceptance.",                "contract": "Contract A"},
+        {"clause_type": "dispute_resolution", "clause_text": "All disputes shall be resolved through binding arbitration in New York under AAA rules.",                   "contract": "Contract A"},
+        {"clause_type": "ip_ownership",       "clause_text": "Licensee is granted an exclusive, worldwide, perpetual license to use the Software.",                       "contract": "Contract A"},
+        {"clause_type": "confidentiality",    "clause_text": "Neither party shall disclose Confidential Information to any third party without prior written consent.",   "contract": "Contract A"},
+        {"clause_type": "governing_law",      "clause_text": "This agreement shall be governed by the laws of Delaware.",                                                 "contract": "Contract A"},
+    ],
+    "clauses_b": [
+        {"clause_type": "termination",        "clause_text": "This agreement may only be terminated for cause — material breach uncured for 60 days after notice.",       "contract": "Contract B"},
+        {"clause_type": "warranty",           "clause_text": "Seller disclaims all warranties, express or implied, including merchantability or fitness for purpose.",    "contract": "Contract B"},
+        {"clause_type": "dispute_resolution", "clause_text": "Either party may bring suit in any court of competent jurisdiction to resolve disputes.",                   "contract": "Contract B"},
+        {"clause_type": "ip_ownership",       "clause_text": "License granted is non-exclusive, limited to the United States, valid for 12 months only.",                "contract": "Contract B"},
+        {"clause_type": "confidentiality",    "clause_text": "Confidential Information must not be shared with outside parties unless the disclosing party agrees.",      "contract": "Contract B"},
+        {"clause_type": "governing_law",      "clause_text": "This agreement is governed by the laws of California.",                                                    "contract": "Contract B"},
+    ],
+    "conflicts": [
+        {"clause_type": "termination",        "clause_a": "Either party may terminate this agreement for convenience upon 30 days written notice.",             "clause_b": "This agreement may only be terminated for cause — material breach uncured for 60 days after notice.", "predicted_label": "contradiction", "predicted_score": 0.9312, "contradiction_score": 0.9312, "all_scores": {"contradiction": 0.9312, "entailment": 0.0421, "neutral": 0.0267}, "token_length": 87,  "uncertain": False},
+        {"clause_type": "warranty",           "clause_a": "Seller warrants all deliverables shall be free from defects for 24 months from acceptance.",        "clause_b": "Seller disclaims all warranties, express or implied, including merchantability or fitness for purpose.",  "predicted_label": "contradiction", "predicted_score": 0.9741, "contradiction_score": 0.9741, "all_scores": {"contradiction": 0.9741, "entailment": 0.0159, "neutral": 0.0100}, "token_length": 72,  "uncertain": False},
+        {"clause_type": "dispute_resolution", "clause_a": "All disputes shall be resolved through binding arbitration in New York under AAA rules.",           "clause_b": "Either party may bring suit in any court of competent jurisdiction to resolve disputes.",               "predicted_label": "contradiction", "predicted_score": 0.8823, "contradiction_score": 0.8823, "all_scores": {"contradiction": 0.8823, "entailment": 0.0712, "neutral": 0.0465}, "token_length": 65,  "uncertain": False},
+        {"clause_type": "ip_ownership",       "clause_a": "Licensee is granted an exclusive, worldwide, perpetual license to use the Software.",               "clause_b": "License granted is non-exclusive, limited to the United States, valid for 12 months only.",             "predicted_label": "contradiction", "predicted_score": 0.9567, "contradiction_score": 0.9567, "all_scores": {"contradiction": 0.9567, "entailment": 0.0281, "neutral": 0.0152}, "token_length": 68,  "uncertain": False},
+        {"clause_type": "governing_law",      "clause_a": "This agreement shall be governed by the laws of Delaware.",                                        "clause_b": "This agreement is governed by the laws of California.",                                                  "predicted_label": "contradiction", "predicted_score": 0.7834, "contradiction_score": 0.7834, "all_scores": {"contradiction": 0.7834, "entailment": 0.1243, "neutral": 0.0923}, "token_length": 45,  "uncertain": False},
+        {"clause_type": "confidentiality",    "clause_a": "Neither party shall disclose Confidential Information to any third party without prior written consent.", "clause_b": "Confidential Information must not be shared with outside parties unless the disclosing party agrees.", "predicted_label": "neutral",      "predicted_score": 0.5821, "contradiction_score": 0.2341, "all_scores": {"contradiction": 0.2341, "entailment": 0.1838, "neutral": 0.5821},         "token_length": 58,  "uncertain": True},
+    ],
+}
+@app.route("/api/compare", methods=["POST"])
+def compare_contracts():
+    """
+    POST /api/compare
+    Body: { "contract_a": "...", "contract_b": "..." }
+    Returns: { clauses_a, clauses_b, conflicts }
+    Extracts clauses from both contracts via Groq (clause_extractor.py) then
+    scores each matched pair with model_3 for entailment / contradiction.
+    Falls back to MOCK_COMPARE_RESPONSE when clause_extractor is unavailable.
+    """
+    data       = request.get_json(force=True, silent=True) or {}  # force=True ignores Content-Type
+    contract_a = (data.get("contract_a") or "").strip()
+    contract_b = (data.get("contract_b") or "").strip()
+    if not contract_a or not contract_b:
+        print(f"[compare] 400 — got keys: {list(data.keys())}, "
+              f"a={bool(contract_a)}, b={bool(contract_b)}")
+        return jsonify({"error": "Both contract_a and contract_b are required"}), 400
+    if not EXTRACTOR_AVAILABLE:
+        print("[MOCK] clause_extractor unavailable — returning mock compare data")
+        return jsonify(MOCK_COMPARE_RESPONSE)
+    try:
+        print("\n[API] Extracting clauses via Groq...")
+        clauses_a = extract_clauses(contract_a, "Contract A")
+        clauses_b = extract_clauses(contract_b, "Contract B")
+        if not clauses_a or not clauses_b:
+            return jsonify({"error": "Clause extraction returned empty. Check GROQ_API_KEY."}), 500
+        print("[API] Generating pairs...")
+        pairs = generate_pairs(clauses_a, clauses_b)
+        conflicts = []
+        if pairs:
+            print(f"[API] Scoring {len(pairs)} pairs with model_3...")
+            pipe, tokenizer = _get_compare_model()
+            conflicts = score_pairs(pairs, pipe, tokenizer, MAX_LEN, CONF_THRESHOLD)
+        return jsonify({"clauses_a": clauses_a, "clauses_b": clauses_b, "conflicts": conflicts})
+    except Exception as e:
+        print(f"[compare] Pipeline error: {e}")
+        return jsonify({"error": str(e)}), 500
+# ─── SINGLE-CLAUSE NLI (/api/conflicts) ──────────────────────────────────────
+@app.route("/api/conflicts", methods=["POST"])
+def detect_conflicts():
+    """
+    POST /api/conflicts
+    Body: { "clause1": "...", "clause2": "..." }
+    Returns: { label, confidence, scores }
+    """
+    data    = request.get_json(silent=True) or {}
+    clause1 = (data.get("clause1") or "").strip()
+    clause2 = (data.get("clause2") or "").strip()
+    if not clause1 or not clause2:
+        return jsonify({"error": "Both clause1 and clause2 are required"}), 400
+    pipe = get_model("nli")
+    raw  = pipe(f"{clause1} [SEP] {clause2}")
+    if raw and isinstance(raw[0], list):
+        raw = raw[0]
+    scores = {r["label"]: round(r["score"] * 100, 2) for r in raw}
+    best   = max(scores, key=scores.get)
+    return jsonify({"label": best, "confidence": scores[best], "scores": scores})
+# ─── RISK FORECAST (/api/risk) ────────────────────────────────────────────────
+import sys as _sys
+_sys.path.insert(0, os.path.join(BASE_DIR, 'model_2'))
+try:
+    from inference_demo import load_ticker_data, build_risk_score
+except ImportError:
+    print("[!] Warning: Could not import inference_demo from ../model_2")
+    def load_ticker_data(ticker, d): raise NotImplementedError("inference_demo not found")
+    def build_risk_score(df):        raise NotImplementedError("inference_demo not found")
+@app.route("/api/risk", methods=["GET"])
+def risk_forecast():
+    """GET /api/risk?ticker=AAPL&horizon=90"""
+    ticker  = (request.args.get("ticker") or "AAPL").upper()
+    horizon = int(request.args.get("horizon", 90))
+    bundle = get_model("risk")
+    if ticker not in bundle["models"]:
+        return jsonify({"error": f"{ticker} not in model"}), 404
+    try:
+        df = load_ticker_data(ticker.lower(), os.path.join(BASE_DIR, 'data/Stocks'))
+        fe = build_risk_score(df)
+    except Exception as e:
+        return jsonify({"error": f"Failed to engineer features: {e}"}), 500
+    payload   = bundle["models"][ticker]
+    model     = payload["model"]
+    r_min     = payload["r_min"]
+    r_max     = payload["r_max"]
+    threshold = payload["threshold"]
+    current_risk_raw  = fe['risk_raw'].iloc[-1]
+    current_risk_norm = (current_risk_raw - r_min) / (r_max - r_min + 1e-9)
+    future   = model.make_future_dataframe(periods=horizon, freq='B')
+    forecast = model.predict(future)
+    # ── BUG FIX: always use the DatetimeIndex, never the 'Date' column.
+    # The 'Date' column can resolve to today on some machines, which makes
+    # is_future_or_current False for every row and leaves all yhat = null.
+    last_date = pd.to_datetime(fe.index[-1])
+    print(f"[DEBUG] last_date={last_date}  fe.shape={fe.shape}")
+    future_fc = forecast[forecast['ds'] > last_date]
+    breach_detected = False
+    days_to_breach  = None
+    confidence      = "NONE"
+    breach_date     = None
+    for conf, col in [('HIGH', 'yhat_lower'), ('MEDIUM', 'yhat'), ('LOW', 'yhat_upper')]:
+        rows = future_fc[future_fc[col] > threshold]
+        if not rows.empty:
+            breach_detected = True
+            confidence      = conf
+            breach_date     = str(rows.iloc[0]['ds'].date())
+            days_to_breach  = max((rows.iloc[0]['ds'] - last_date).days, 0)
+            break
+    if breach_detected and breach_date:
+        try:
+            ob_type    = ObligationType.LIQUIDITY_RATIO if ticker == 'CHK' else ObligationType.REVENUE
+            breach_obj = BreachedObligation(
+                contract_id=f"AUTO-{ticker}",
+                obligation_type=ob_type,
+                metric_name="Financial Risk Score",
+                threshold_value=round(float(threshold), 2),
+                current_value=round(float(current_risk_norm), 2),
+                predicted_value=None,
+                deadline=breach_date,
+                consequence="Covenant Violation Predicted by Prophet Model",
+                conflict_with=None,
+            )
+            scheduler.process_breach(breach_obj)
+        except Exception as e:
+            print(f"Failed to auto-schedule breach: {e}")
+    def norm(val):
+        span = r_max - r_min if r_max != r_min else 1
+        return round(float(np.clip((val - r_min) / span * 100, 0, 100)), 2)
+    # Build a DatetimeIndex-keyed lookup for fast y_norm resolution
+    fe_indexed = fe["risk_raw"] if fe.index.dtype == "datetime64[ns]" else fe.set_index(pd.to_datetime(fe.index))["risk_raw"]
+    series = []
+    for _, row in forecast.iterrows():
+        ds_ts               = pd.Timestamp(row["ds"])
+        is_future_or_current = ds_ts >= last_date
+        # Historical actual value — look up by normalised date key
+        y_norm = None
+        try:
+            y_val  = fe_indexed.loc[ds_ts]
+            y_norm = norm(float(y_val))
+        except KeyError:
+            pass
+        series.append({
+            "ds":         str(ds_ts.date()),
+            "y":          y_norm,
+            "yhat":       round(float(np.clip(row["yhat"]       * 100, 0, 100)), 2) if is_future_or_current else None,
+            "yhat_lower": round(float(np.clip(row["yhat_lower"] * 100, 0, 100)), 2) if is_future_or_current else None,
+            "yhat_upper": round(float(np.clip(row["yhat_upper"] * 100, 0, 100)), 2) if is_future_or_current else None,
+            "yhat_range": [
+                round(float(np.clip(row["yhat_lower"] * 100, 0, 100)), 2),
+                round(float(np.clip(row["yhat_upper"] * 100, 0, 100)), 2),
+            ] if is_future_or_current else None,
+        })
+    return jsonify({
+        "ticker":            ticker,
+        "available_tickers": list(bundle["models"].keys()),
+        "last_update_date":  str(last_date.date()),
+        "current_price":     round(fe['Close'].iloc[-1], 2),
+        "risk_metrics": {
+            "current_score":      round(current_risk_norm, 4),
+            "danger_threshold":   round(threshold, 4),
+            "is_in_danger_zone":  bool(current_risk_norm > threshold),
+        },
+        "forecast": {
+            "breach_predicted":         breach_detected,
+            "estimated_days_to_breach": days_to_breach,
+            "confidence_level":         confidence,
+        },
+        # Legacy flat keys kept for frontend backward-compat
+        "breach_detected":  breach_detected,
+        "breach_date":      breach_date,
+        "days_to_breach":   days_to_breach,
+        "confidence_tier":  confidence,
+        "risk_score":       round(current_risk_norm * 100, 2),
+        "threshold":        round(float(threshold * 100), 2),
+        "forecast_series":  series,
+        "model_meta": {
+            "run_date":         str(last_date.date()),
+            "horizon_days":     horizon,
+            "target_threshold": round(float(threshold * 100), 2),
+        },
+    })
+@app.route("/api/risk/all", methods=["GET"])
+def get_all():
+    import json
+    mock_path = os.path.join(BASE_DIR, 'model_2/frontend_mock_api.json')
+    if os.path.exists(mock_path):
+        with open(mock_path) as f:
+            return jsonify(json.load(f))
+    return jsonify({"error": "frontend_mock_api.json not found"}), 404
+@app.route("/api/risk/tickers", methods=["GET"])
+def risk_tickers():
+    bundle = get_model("risk")
+    return jsonify({"tickers": list(bundle["models"].keys())})
+# ─── Health check ─────────────────────────────────────────────────────────────
+@app.route("/health")
+def health():
+    return jsonify({"status": "ok", "extractor_available": EXTRACTOR_AVAILABLE})
+# ─── Entry point ──────────────────────────────────────────────────────────────
+if __name__ == "__main__":
+    os.makedirs("session_data", exist_ok=True)
+    # Disable watchdog reloader on Windows — causes WinError 10038 when
+    # ML model directories are watched. Debug logging stays active.
+    use_reloader = sys.platform != "win32" and not IS_PROD
+    app.run(host="0.0.0.0", port=7860)

model3.py ADDED Viewed

	@@ -0,0 +1,200 @@

+"""
+ContractPulse - Flask Backend
+Wraps clause_extractor.py pipeline for Next.js frontend consumption.
+Run: python app.py
+Requires: pip install flask flask-cors python-dotenv groq transformers torch
+"""
+import os
+import sys
+import json
+from flask import Flask, request, jsonify
+from flask_cors import CORS
+from dotenv import load_dotenv
+load_dotenv()
+# ── Add parent directory to path so clause_extractor.py is importable ────────
+# Adjust this path to wherever clause_extractor.py lives relative to app.py
+EXTRACTOR_DIR = os.path.join(os.path.dirname(__file__), "..")
+sys.path.insert(0, EXTRACTOR_DIR)
+try:
+    from clause_extractor import extract_clauses, generate_pairs, load_model3, score_pairs
+    EXTRACTOR_AVAILABLE = True
+except ImportError as e:
+    print(f"[WARN] clause_extractor not found: {e}. Running in mock mode.")
+    EXTRACTOR_AVAILABLE = False
+app = Flask(__name__)
+CORS(app, origins=["http://localhost:3000", "http://127.0.0.1:3000"])
+# ── Config ────────────────────────────────────────────────────────────────────
+MODEL3_DIR     = os.getenv("MODEL3_DIR", "../model_3")
+MAX_LEN        = int(os.getenv("MAX_LEN", "512"))
+CONF_THRESHOLD = float(os.getenv("CONF_THRESHOLD", "0.7"))
+# Cache model so it's not reloaded on every request
+_model_cache = {}
+def get_model():
+    if not _model_cache:
+        pipe, tokenizer = load_model3(MODEL3_DIR, MAX_LEN)
+        _model_cache["pipe"] = pipe
+        _model_cache["tokenizer"] = tokenizer
+    return _model_cache["pipe"], _model_cache["tokenizer"]
+# ── Mock data for development (when extractor unavailable) ────────────────────
+MOCK_RESPONSE = {
+    "clauses_a": [
+        {"clause_type": "termination",        "clause_text": "Either party may terminate this agreement for convenience upon 30 days written notice.", "contract": "Contract A"},
+        {"clause_type": "warranty",           "clause_text": "Seller warrants all deliverables shall be free from defects for 24 months from acceptance.", "contract": "Contract A"},
+        {"clause_type": "dispute_resolution", "clause_text": "All disputes shall be resolved through binding arbitration in New York under AAA rules.", "contract": "Contract A"},
+        {"clause_type": "ip_ownership",       "clause_text": "Licensee is granted an exclusive, worldwide, perpetual license to use the Software.", "contract": "Contract A"},
+        {"clause_type": "confidentiality",    "clause_text": "Neither party shall disclose Confidential Information to any third party without prior written consent.", "contract": "Contract A"},
+        {"clause_type": "governing_law",      "clause_text": "This agreement shall be governed by the laws of Delaware.", "contract": "Contract A"},
+    ],
+    "clauses_b": [
+        {"clause_type": "termination",        "clause_text": "This agreement may only be terminated for cause — material breach uncured for 60 days after notice.", "contract": "Contract B"},
+        {"clause_type": "warranty",           "clause_text": "Seller disclaims all warranties, express or implied, including merchantability or fitness for purpose.", "contract": "Contract B"},
+        {"clause_type": "dispute_resolution", "clause_text": "Either party may bring suit in any court of competent jurisdiction to resolve disputes.", "contract": "Contract B"},
+        {"clause_type": "ip_ownership",       "clause_text": "License granted is non-exclusive, limited to the United States, valid for 12 months only.", "contract": "Contract B"},
+        {"clause_type": "confidentiality",    "clause_text": "Confidential Information must not be shared with outside parties unless the disclosing party agrees in writing.", "contract": "Contract B"},
+        {"clause_type": "governing_law",      "clause_text": "This agreement is governed by the laws of California.", "contract": "Contract B"},
+    ],
+    "conflicts": [
+        {
+            "clause_type": "termination",
+            "clause_a": "Either party may terminate this agreement for convenience upon 30 days written notice.",
+            "clause_b": "This agreement may only be terminated for cause — material breach uncured for 60 days after notice.",
+            "predicted_label": "contradiction",
+            "predicted_score": 0.9312,
+            "contradiction_score": 0.9312,
+            "all_scores": {"contradiction": 0.9312, "entailment": 0.0421, "neutral": 0.0267},
+            "token_length": 87,
+            "uncertain": False,
+        },
+        {
+            "clause_type": "warranty",
+            "clause_a": "Seller warrants all deliverables shall be free from defects for 24 months from acceptance.",
+            "clause_b": "Seller disclaims all warranties, express or implied, including merchantability or fitness for purpose.",
+            "predicted_label": "contradiction",
+            "predicted_score": 0.9741,
+            "contradiction_score": 0.9741,
+            "all_scores": {"contradiction": 0.9741, "entailment": 0.0159, "neutral": 0.0100},
+            "token_length": 72,
+            "uncertain": False,
+        },
+        {
+            "clause_type": "dispute_resolution",
+            "clause_a": "All disputes shall be resolved through binding arbitration in New York under AAA rules.",
+            "clause_b": "Either party may bring suit in any court of competent jurisdiction to resolve disputes.",
+            "predicted_label": "contradiction",
+            "predicted_score": 0.8823,
+            "contradiction_score": 0.8823,
+            "all_scores": {"contradiction": 0.8823, "entailment": 0.0712, "neutral": 0.0465},
+            "token_length": 65,
+            "uncertain": False,
+        },
+        {
+            "clause_type": "ip_ownership",
+            "clause_a": "Licensee is granted an exclusive, worldwide, perpetual license to use the Software.",
+            "clause_b": "License granted is non-exclusive, limited to the United States, valid for 12 months only.",
+            "predicted_label": "contradiction",
+            "predicted_score": 0.9567,
+            "contradiction_score": 0.9567,
+            "all_scores": {"contradiction": 0.9567, "entailment": 0.0281, "neutral": 0.0152},
+            "token_length": 68,
+            "uncertain": False,
+        },
+        {
+            "clause_type": "governing_law",
+            "clause_a": "This agreement shall be governed by the laws of Delaware.",
+            "clause_b": "This agreement is governed by the laws of California.",
+            "predicted_label": "contradiction",
+            "predicted_score": 0.7834,
+            "contradiction_score": 0.7834,
+            "all_scores": {"contradiction": 0.7834, "entailment": 0.1243, "neutral": 0.0923},
+            "token_length": 45,
+            "uncertain": False,
+        },
+        {
+            "clause_type": "confidentiality",
+            "clause_a": "Neither party shall disclose Confidential Information to any third party without prior written consent.",
+            "clause_b": "Confidential Information must not be shared with outside parties unless the disclosing party agrees in writing.",
+            "predicted_label": "neutral",
+            "predicted_score": 0.5821,
+            "contradiction_score": 0.2341,
+            "all_scores": {"contradiction": 0.2341, "entailment": 0.1838, "neutral": 0.5821},
+            "token_length": 58,
+            "uncertain": True,
+        },
+    ]
+}
+# ── Routes ────────────────────────────────────────────────────────────────────
+@app.route("/health", methods=["GET"])
+def health():
+    return jsonify({"status": "ok", "extractor_available": EXTRACTOR_AVAILABLE})
+@app.route("/analyze", methods=["POST"])
+def analyze():
+    """
+    POST /analyze
+    Body: { "contract_a": "...", "contract_b": "..." }
+    Returns: { clauses_a, clauses_b, conflicts }
+    """
+    data = request.get_json()
+    if not data:
+        return jsonify({"error": "No JSON body provided"}), 400
+    contract_a = data.get("contract_a", "").strip()
+    contract_b = data.get("contract_b", "").strip()
+    if not contract_a or not contract_b:
+        return jsonify({"error": "Both contract_a and contract_b are required"}), 400
+    # ── Dev mode: return mock data ────────────────────────────────────────────
+    if not EXTRACTOR_AVAILABLE:
+        print("[MOCK] Returning mock analysis data")
+        return jsonify(MOCK_RESPONSE)
+    # ── Production: run the real pipeline ────────────────────────────────────
+    try:
+        print("\n[API] Extracting clauses via Groq...")
+        clauses_a = extract_clauses(contract_a, "Contract A")
+        clauses_b = extract_clauses(contract_b, "Contract B")
+        if not clauses_a or not clauses_b:
+            return jsonify({"error": "Clause extraction returned empty. Check GROQ_API_KEY."}), 500
+        print("[API] Generating pairs...")
+        pairs = generate_pairs(clauses_a, clauses_b)
+        conflicts = []
+        if pairs:
+            print(f"[API] Scoring {len(pairs)} pairs with Model 3...")
+            pipe, tokenizer = get_model()
+            conflicts = score_pairs(pairs, pipe, tokenizer, MAX_LEN, CONF_THRESHOLD)
+        return jsonify({
+            "clauses_a": clauses_a,
+            "clauses_b": clauses_b,
+            "conflicts": conflicts,
+        })
+    except Exception as e:
+        print(f"[ERROR] Pipeline failed: {e}")
+        return jsonify({"error": str(e)}), 500
+if __name__ == "__main__":
+    print("\n[ContractPulse Backend] Starting on http://localhost:5000")
+    print(f"[ContractPulse Backend] Extractor available: {EXTRACTOR_AVAILABLE}")
+    app.run(debug=True, port=5000)

scheduler_api.py ADDED Viewed

	@@ -0,0 +1,548 @@

+"""
+ContractPulse Scheduler — Flask REST API
+=========================================
+Wraps the TaskScheduler + MeetingRoomScheduler into a clean HTTP API.
+Endpoints:
+  POST /api/process_breach      — process a single breach
+  POST /api/process_batch       — process multiple breaches
+  GET  /api/tasks               — list all tasks (optional ?severity=CRITICAL)
+  GET  /api/meetings            — list all booked meetings
+  GET  /api/departments         — department workload summary
+  POST /api/reset               — reset the scheduler state
+  GET  /api/health              — health check
+"""
+from __future__ import annotations
+import os
+import smtplib
+from email.message import EmailMessage
+import uuid
+from dataclasses import dataclass, field
+from datetime import datetime, timedelta
+from enum import Enum
+from typing import Optional
+from flask import Blueprint, jsonify, request
+scheduler_bp = Blueprint("scheduler", __name__)
+# ─────────────────────────────────────────────────────────────
+# SCHEDULER CORE
+# ─────────────────────────────────────────────────────────────
+class Department(Enum):
+    FINANCE     = "Finance"
+    LEGAL       = "Legal"
+    TECH        = "Tech"
+    OPERATIONS  = "Operations"
+    COMPLIANCE  = "Compliance"
+    EXECUTIVE   = "Executive"
+class Severity(Enum):
+    LOW      = 1
+    MEDIUM   = 2
+    HIGH     = 3
+    CRITICAL = 4
+class ObligationType(Enum):
+    REVENUE             = "revenue"
+    DEBT_TO_EQUITY      = "debt_to_equity_ratio"
+    CURRENT_RATIO       = "current_ratio"
+    REPORT_SUBMISSION   = "report_submission"
+    WORKFORCE_SIZE      = "workforce_size"
+    CAPEX_LIMIT         = "capex_limit"
+    LIQUIDITY_RATIO     = "liquidity_ratio"
+    INSURANCE_COVERAGE  = "insurance_coverage"
+    UNKNOWN             = "unknown"
+OBLIGATION_OWNERS: dict[ObligationType, list[Department]] = {
+    ObligationType.REVENUE:            [Department.FINANCE, Department.EXECUTIVE],
+    ObligationType.DEBT_TO_EQUITY:     [Department.FINANCE, Department.LEGAL],
+    ObligationType.CURRENT_RATIO:      [Department.FINANCE],
+    ObligationType.REPORT_SUBMISSION:  [Department.COMPLIANCE, Department.LEGAL],
+    ObligationType.WORKFORCE_SIZE:     [Department.OPERATIONS, Department.LEGAL],
+    ObligationType.CAPEX_LIMIT:        [Department.FINANCE, Department.OPERATIONS],
+    ObligationType.LIQUIDITY_RATIO:    [Department.FINANCE, Department.EXECUTIVE],
+    ObligationType.INSURANCE_COVERAGE: [Department.COMPLIANCE, Department.LEGAL],
+    ObligationType.UNKNOWN:            [Department.LEGAL],
+}
+SEVERITY_ESCALATION: dict[Severity, list[Department]] = {
+    Severity.LOW:      [],
+    Severity.MEDIUM:   [],
+    Severity.HIGH:     [Department.LEGAL],
+    Severity.CRITICAL: [Department.LEGAL, Department.EXECUTIVE],
+}
+SEVERITY_DUE_HOURS: dict[Severity, int] = {
+    Severity.LOW:      72,
+    Severity.MEDIUM:   48,
+    Severity.HIGH:     24,
+    Severity.CRITICAL: 4,
+}
+@dataclass
+class Room:
+    room_id: str
+    name: str
+    capacity: int
+    has_av: bool = True
+@dataclass(order=True)
+class MeetingSlot:
+    start: datetime
+    end: datetime
+    meeting_id: str = field(compare=False)
+    room_id: str    = field(compare=False)
+    title: str      = field(compare=False)
+    attendees: int  = field(compare=False, default=4)
+    def overlaps(self, start: datetime, end: datetime) -> bool:
+        return self.start < end and self.end > start
+    def to_dict(self) -> dict:
+        return {
+            "meeting_id":  self.meeting_id,
+            "title":       self.title,
+            "room":        self.room_id,
+            "start":       self.start.isoformat(),
+            "end":         self.end.isoformat(),
+            "attendees":   self.attendees,
+        }
+@dataclass
+class Meeting:
+    title: str
+    start: datetime
+    end: datetime
+    attendees: int
+    needs_av: bool = False
+    meeting_id: str = field(default_factory=lambda: str(uuid.uuid4())[:8])
+class MeetingRoomScheduler:
+    def __init__(self, rooms: list[Room]) -> None:
+        self.rooms = sorted(rooms, key=lambda r: r.capacity)
+        self._bookings: dict[str, list[MeetingSlot]] = {r.room_id: [] for r in rooms}
+    def reset(self) -> None:
+        self._bookings = {r.room_id: [] for r in self.rooms}
+    def schedule(self, meeting: Meeting) -> Optional[MeetingSlot]:
+        if meeting.end <= meeting.start:
+            return None
+        for room in self._eligible_rooms(meeting):
+            if self._is_free(room, meeting.start, meeting.end):
+                slot = MeetingSlot(
+                    start=meeting.start, end=meeting.end,
+                    meeting_id=meeting.meeting_id, room_id=room.room_id,
+                    title=meeting.title, attendees=meeting.attendees,
+                )
+                self._bookings[room.room_id].append(slot)
+                self._bookings[room.room_id].sort()
+                return slot
+        return None
+    def free_slots(self, room_id: str, window_start: datetime, window_end: datetime, min_duration_minutes: int = 30) -> list[tuple]:
+        slots = self._bookings.get(room_id, [])
+        free, cursor = [], window_start
+        for slot in slots:
+            if slot.start > cursor:
+                if (slot.start - cursor).total_seconds() / 60 >= min_duration_minutes:
+                    free.append((cursor, slot.start))
+            cursor = max(cursor, slot.end)
+        if cursor < window_end and (window_end - cursor).total_seconds() / 60 >= min_duration_minutes:
+            free.append((cursor, window_end))
+        return free
+    def all_bookings(self) -> list[MeetingSlot]:
+        result = []
+        for slots in self._bookings.values():
+            result.extend(slots)
+        return sorted(result)
+    def _eligible_rooms(self, meeting: Meeting) -> list[Room]:
+        return [r for r in self.rooms if r.capacity >= meeting.attendees and (not meeting.needs_av or r.has_av)]
+    def _is_free(self, room: Room, start: datetime, end: datetime) -> bool:
+        return not any(s.overlaps(start, end) for s in self._bookings[room.room_id])
+@dataclass
+class BreachedObligation:
+    contract_id: str
+    obligation_type: ObligationType
+    metric_name: str
+    threshold_value: float
+    current_value: float
+    predicted_value: Optional[float]
+    deadline: str
+    consequence: str
+    conflict_with: Optional[str] = None
+    @property
+    def breach_gap(self) -> float:
+        return self.current_value - self.threshold_value
+    def auto_severity(self) -> Severity:
+        critical_kw = {"termination", "default", "liquidation", "penalty"}
+        high_kw     = {"notice", "cure period", "acceleration"}
+        cl = self.consequence.lower()
+        if any(k in cl for k in critical_kw) or self.conflict_with:
+            return Severity.CRITICAL
+        if any(k in cl for k in high_kw):
+            return Severity.HIGH
+        gap_pct = abs(self.breach_gap) / (abs(self.threshold_value) + 1e-9)
+        if gap_pct > 0.30:
+            return Severity.HIGH
+        if gap_pct > 0.10:
+            return Severity.MEDIUM
+        return Severity.LOW
+@dataclass
+class Task:
+    task_id: str
+    title: str
+    description: str
+    assigned_to: list[Department]
+    severity: Severity
+    due_by: datetime
+    contract_id: str
+    obligation_type: ObligationType
+    created_at: datetime = field(default_factory=datetime.now)
+    def to_dict(self) -> dict:
+        return {
+            "task_id":         self.task_id,
+            "title":           self.title,
+            "description":     self.description,
+            "assigned_to":     [d.value for d in self.assigned_to],
+            "severity":        self.severity.name,
+            "due_by":          self.due_by.isoformat(),
+            "contract_id":     self.contract_id,
+            "obligation_type": self.obligation_type.value,
+            "created_at":      self.created_at.isoformat(),
+        }
+class TaskScheduler:
+    def __init__(self, room_scheduler: Optional[MeetingRoomScheduler] = None) -> None:
+        self._tasks: list[Task] = []
+        self._room_scheduler = room_scheduler
+        self._task_counter = 0
+        self._auto_meetings: list[MeetingSlot] = []
+    def reset(self) -> None:
+        self._tasks = []
+        self._task_counter = 0
+        self._auto_meetings = []
+        if self._room_scheduler:
+            self._room_scheduler.reset()
+    def process_breach(self, breach: BreachedObligation) -> tuple[Task, Optional[MeetingSlot]]:
+        severity    = breach.auto_severity()
+        departments = self._resolve_departments(breach.obligation_type, severity)
+        due_by      = datetime.now() + timedelta(hours=SEVERITY_DUE_HOURS[severity])
+        self._task_counter += 1
+        task = Task(
+            task_id=f"TASK-{self._task_counter:04d}",
+            title=f"[{severity.name}] Breach: {breach.metric_name} in {breach.contract_id}",
+            description=self._build_description(breach, severity),
+            assigned_to=departments,
+            severity=severity,
+            due_by=due_by,
+            contract_id=breach.contract_id,
+            obligation_type=breach.obligation_type,
+        )
+        self._tasks.append(task)
+        meeting_slot = None
+        if breach.conflict_with and self._room_scheduler:
+            meeting_slot = self._book_conflict_meeting(task, breach)
+        self._alert_team_via_email(task)
+        return task, meeting_slot
+    def _alert_team_via_email(self, task: Task) -> None:
+        if task.severity not in [Severity.CRITICAL, Severity.HIGH]:
+            return
+        email_body = f"""
+        [URGENT] Covenant Breach Alert: {task.title}
+        A {task.severity.name} severity breach has been processed by the ContractPulse Response Engine.
+        Task ID: {task.task_id}
+        Assigned To: {[d.value for d in task.assigned_to]}
+        Due By: {task.due_by.strftime('%Y-%m-%d %H:%M:%S')}
+        Details:
+        {task.description}
+        Please take immediate action.
+        """
+        # Try to send a real email if credentials are set
+        sender_email = os.getenv("SMTP_EMAIL")
+        sender_password = os.getenv("SMTP_PASSWORD")
+        receiver_email = os.getenv("ALERT_RECEIVER_EMAIL", sender_email) # Send to self if no specific receiver
+        if sender_email and sender_password and receiver_email:
+            try:
+                msg = EmailMessage()
+                msg.set_content(email_body)
+                msg['Subject'] = f"[URGENT] Covenant Breach Alert: {task.title}"
+                msg['From'] = sender_email
+                msg['To'] = receiver_email
+                # Assuming Gmail SMTP for this example
+                with smtplib.SMTP_SSL("smtp.gmail.com", 465) as server:
+                    server.login(sender_email, sender_password)
+                    server.send_message(msg)
+                print(f"📧 REAL EMAIL SUCCESSFULLY SENT TO {receiver_email}")
+                return
+            except Exception as e:
+                print(f"Failed to send real email: {e}. Falling back to terminal log.")
+        # Fallback to terminal if no credentials or sending fails
+        print("\n" + "="*60)
+        print("📧 AUTOMATED EMAIL ALERT DISPATCHED (Terminal Mock)")
+        print("="*60)
+        print(email_body)
+        print("="*60 + "\n")
+    def process_batch(self, breaches: list[BreachedObligation]) -> tuple[list[Task], list[MeetingSlot]]:
+        results = [self.process_breach(b) for b in breaches]
+        tasks    = [r[0] for r in results]
+        meetings = [r[1] for r in results if r[1]]
+        return sorted(tasks, key=lambda t: t.severity.value, reverse=True), meetings
+    def all_tasks(self) -> list[Task]:
+        return sorted(self._tasks, key=lambda t: t.severity.value, reverse=True)
+    def department_summary(self) -> dict:
+        summary = {}
+        for task in self._tasks:
+            for dept in task.assigned_to:
+                key = dept.value
+                if key not in summary:
+                    summary[key] = {s.name: 0 for s in Severity}
+                summary[key][task.severity.name] += 1
+        return summary
+    def _resolve_departments(self, ob_type: ObligationType, severity: Severity) -> list[Department]:
+        primary   = OBLIGATION_OWNERS.get(ob_type, [Department.LEGAL])
+        escalated = SEVERITY_ESCALATION.get(severity, [])
+        return list(dict.fromkeys(primary + escalated))
+    def _build_description(self, breach: BreachedObligation, severity: Severity) -> str:
+        lines = [
+            f"Contract  : {breach.contract_id}",
+            f"Metric    : {breach.metric_name}",
+            f"Threshold : {breach.threshold_value}",
+            f"Current   : {breach.current_value}  (gap={breach.breach_gap:+.2f})",
+        ]
+        if breach.predicted_value is not None:
+            lines.append(f"Predicted : {breach.predicted_value} (next period)")
+        lines += [f"Deadline  : {breach.deadline}", f"Consequence: {breach.consequence}"]
+        if breach.conflict_with:
+            lines.append(f"⚠ Conflicts with: {breach.conflict_with}")
+        return "\n  ".join(lines)
+    def _book_conflict_meeting(self, task: Task, breach: BreachedObligation) -> Optional[MeetingSlot]:
+        now        = datetime.now().replace(second=0, microsecond=0)
+        window_end = now + timedelta(hours=8)
+        for room in self._room_scheduler.rooms:
+            free = self._room_scheduler.free_slots(room.room_id, now, window_end, min_duration_minutes=60)
+            if free:
+                start, _ = free[0]
+                meeting  = Meeting(
+                    title=f"Conflict Review: {breach.contract_id} vs {breach.conflict_with}",
+                    start=start, end=start + timedelta(minutes=60),
+                    attendees=max(3, len(task.assigned_to) * 2),
+                    needs_av=True,
+                )
+                slot = self._room_scheduler.schedule(meeting)
+                if slot:
+                    self._auto_meetings.append(slot)
+                    return slot
+        return None
+# ─────────────────────────────────────────────────────────────
+# GLOBAL SCHEDULER INSTANCE
+# ─────────────────────────────────────────────────────────────
+def _build_scheduler() -> TaskScheduler:
+    rooms = [
+        Room("R1", "Boardroom",     capacity=20, has_av=True),
+        Room("R2", "Conference A",  capacity=10, has_av=True),
+        Room("R3", "Conference B",  capacity=8,  has_av=False),
+        Room("R4", "Huddle Room 1", capacity=4,  has_av=False),
+        Room("R5", "Huddle Room 2", capacity=4,  has_av=True),
+    ]
+    return TaskScheduler(room_scheduler=MeetingRoomScheduler(rooms))
+# Use a mutable container so blueprint routes can rebind it reliably
+_state = {"scheduler": _build_scheduler()}
+def get_scheduler() -> TaskScheduler:
+    return _state["scheduler"]
+# ─────────────────────────────────────────────────────────────
+# HELPERS
+# ─────────────────────────────────────────────────────────────
+def _parse_obligation_type(raw: str) -> ObligationType:
+    for ot in ObligationType:
+        if ot.value == raw:
+            return ot
+    return ObligationType.UNKNOWN
+def _breach_from_dict(d: dict) -> BreachedObligation:
+    return BreachedObligation(
+        contract_id=d["contract_id"],
+        obligation_type=_parse_obligation_type(d.get("obligation_type", "unknown")),
+        metric_name=d.get("metric_name", d.get("obligation_type", "unknown")),
+        threshold_value=float(d["threshold_value"]),
+        current_value=float(d["current_value"]),
+        predicted_value=float(d["predicted_value"]) if d.get("predicted_value") is not None else None,
+        deadline=d.get("deadline", "unknown"),
+        consequence=d.get("consequence", "notice"),
+        # BUG FIX: treat empty string as None so conflict meetings are not
+        # accidentally triggered when the frontend sends conflict_with: ""
+        conflict_with=d.get("conflict_with") or None,
+    )
+# ─────────────────────────────────────────────────────────────
+# ROUTES
+# ─────────────────────────────────────────────────────────────
+@scheduler_bp.get("/api/health")
+def health():
+    return jsonify({"status": "ok", "timestamp": datetime.now().isoformat()})
+@scheduler_bp.post("/api/process_breach")
+def process_breach():
+    """
+    Body (JSON):
+      {
+        "contract_id": "CTR-001",
+        "obligation_type": "revenue",
+        "metric_name": "revenue",
+        "threshold_value": 5000000,
+        "current_value": 3800000,
+        "predicted_value": 3500000,   // optional
+        "deadline": "annually",
+        "consequence": "termination",
+        "conflict_with": null          // optional contract_id
+      }
+    """
+    body = request.get_json(force=True)
+    try:
+        breach = _breach_from_dict(body)
+    except (KeyError, ValueError) as e:
+        return jsonify({"error": f"Invalid payload: {e}"}), 400
+    task, meeting_slot = get_scheduler().process_breach(breach)
+    return jsonify({
+        "task":    task.to_dict(),
+        "meeting": meeting_slot.to_dict() if meeting_slot else None,
+    }), 201
+@scheduler_bp.post("/api/process_batch")
+def process_batch():
+    """
+    Body: { "breaches": [ <breach>, ... ] }
+    Returns tasks sorted by severity desc + any auto-booked meetings.
+    """
+    body = request.get_json(force=True)
+    raw_breaches = body.get("breaches", [])
+    if not raw_breaches:
+        return jsonify({"error": "breaches list is empty"}), 400
+    try:
+        breaches = [_breach_from_dict(b) for b in raw_breaches]
+    except (KeyError, ValueError) as e:
+        return jsonify({"error": f"Invalid breach in batch: {e}"}), 400
+    tasks, meetings = get_scheduler().process_batch(breaches)
+    return jsonify({
+        "tasks":    [t.to_dict() for t in tasks],
+        "meetings": [m.to_dict() for m in meetings],
+        "count":    len(tasks),
+    }), 201
+@scheduler_bp.get("/api/tasks")
+def get_tasks():
+    """
+    Query params:
+      ?severity=CRITICAL   (optional filter)
+      ?department=Finance  (optional filter)
+    """
+    sev_filter  = request.args.get("severity", "").upper()
+    dept_filter = request.args.get("department", "")
+    tasks = get_scheduler().all_tasks()
+    if sev_filter and sev_filter in Severity.__members__:
+        target = Severity[sev_filter]
+        tasks = [t for t in tasks if t.severity == target]
+    if dept_filter:
+        dept_vals = [d.value for d in Department]
+        if dept_filter in dept_vals:
+            target_dept = Department(dept_filter)
+            tasks = [t for t in tasks if target_dept in t.assigned_to]
+    return jsonify({"tasks": [t.to_dict() for t in tasks], "count": len(tasks)})
+# BUG FIX: There were TWO @scheduler_bp.get("/api/meetings") routes in the
+# original file. Flask registers only the FIRST one it sees, which called a
+# non-existent method `_task_scheduler_meetings()` and crashed on every
+# request. The second (correct) route was silently ignored.
+# Fixed: one route that correctly reads _auto_meetings from the scheduler.
+@scheduler_bp.get("/api/meetings")
+def get_meetings():
+    """Return all auto-booked conflict-resolution meeting slots."""
+    auto = get_scheduler()._auto_meetings or []
+    return jsonify({"meetings": [m.to_dict() for m in auto], "count": len(auto)})
+@scheduler_bp.get("/api/departments")
+def get_departments():
+    return jsonify({"summary": get_scheduler().department_summary()})
+@scheduler_bp.post("/api/reset")
+def reset():
+    # BUG FIX: rebinding a module-level `scheduler` variable inside a
+    # blueprint function is unreliable — the local name rebinds but callers
+    # that already imported the old object keep the stale reference.
+    # Fixed: mutate the shared _state dict so all routes see the new instance.
+    _state["scheduler"] = _build_scheduler()
+    return jsonify({"status": "reset", "timestamp": datetime.now().isoformat()})
+# Add this at the very bottom of scheduler_api.py
+scheduler = _state["scheduler"]

test_pipeline.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import json
+import os
+import sys
+# Add project root to path so 'src' package is found
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
+from all_model_code.model_1_code.pipeline import ObligationPipeline
+def run_test():
+    # ─── Sample contract (you can replace this later) ───
+    sample_contract = """
+    The Borrower shall maintain at all times a Debt-to-Equity Ratio of not more than 2.5 to 1.0, tested quarterly.
+    Failure to maintain this ratio shall constitute an event of default.
+    The Company shall maintain minimum annual gross revenue of at least $5,000,000 during each contract year.
+    The Vendor shall obtain and maintain commercial general liability insurance with a minimum coverage amount of $2,000,000.
+    """
+    # ─── Initialize pipeline ───
+    # Ensure the model path points to the actual folder where your weights live
+    BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+    model_path = os.path.join(BASE_DIR, "ckpt_obligation_fast")
+    config = {
+        "model_name": model_path,
+        "device": "cpu",
+        "filter_min_confidence": 0.4,
+        "min_fields": 2
+    }
+    pipeline = ObligationPipeline(config)
+    print("\n" + "=" * 60)
+    print("RUNNING OBLIGATION EXTRACTION TEST")
+    print("=" * 60)
+    # ─── Run pipeline ───
+    results = pipeline.process(
+        source=sample_contract,
+        source_type="text",
+        contract_id="demo_contract",
+        debug=False
+    )
+    # ─── Clean display ───
+    cleaned_results = []
+    for r in results:
+        cleaned = {
+            "metric": r.get("metric_name"),
+            "operator": r.get("operator"),
+            "value": r.get("threshold_value"),
+            "deadline": r.get("deadline"),
+            "consequence": r.get("consequence"),
+            "confidence": round(r.get("confidence_score", 0), 3)
+        }
+        cleaned = {k: v for k, v in cleaned.items() if v is not None}
+        cleaned_results.append(cleaned)
+    # ─── Deduplicate: same metric + same value → keep highest confidence ───
+    seen = {}
+    for obligation in cleaned_results:
+        key = (obligation.get("metric"), obligation.get("value"))
+        if key not in seen:
+            seen[key] = obligation
+        else:
+            if obligation.get("confidence", 0) > seen[key].get("confidence", 0):
+                seen[key] = obligation
+    deduplicated_results = list(seen.values())
+    print("\nExtracted Obligations:\n")
+    print(json.dumps(deduplicated_results, indent=2))
+    print("\nTotal Obligations Found:", len(deduplicated_results))
+if __name__ == "__main__":
+    run_test()

test_routes.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import sys
+import email.message
+class _CGI:
+    @staticmethod
+    def parse_header(line):
+        m = email.message.Message()
+        m['content-type'] = line
+        return m.get_content_type(), m.get_params() or {}
+sys.modules['cgi'] = _CGI()
+from main import app
+print([r.rule for r in app.url_map.iter_rules()])