akshay1306 commited on
Commit
b3280aa
Β·
verified Β·
1 Parent(s): c61aab1

Upload 7 files

Browse files
Files changed (7) hide show
  1. Dockerfile +22 -0
  2. clause_extractor.py +384 -0
  3. main.py +658 -0
  4. model3.py +200 -0
  5. scheduler_api.py +548 -0
  6. test_pipeline.py +81 -0
  7. test_routes.py +14 -0
Dockerfile ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system dependencies (needed for pandas, prophet, etc.)
6
+ RUN apt-get update && apt-get install -y \
7
+ build-essential \
8
+ gcc \
9
+ g++ \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ # Copy backend code
13
+ COPY . /app
14
+
15
+ # Install Python dependencies
16
+ RUN pip install --no-cache-dir -r requirements.txt
17
+
18
+ # Expose HF required port
19
+ EXPOSE 7860
20
+
21
+ # Start app
22
+ CMD ["python", "main.py"]
clause_extractor.py ADDED
@@ -0,0 +1,384 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Cross-Contract Clause Extractor and Pair Generator
3
+ Uses Groq API (groq.com) for clause extraction
4
+ Feeds pairs into Model 3 (NLI conflict detection)
5
+
6
+ Install: pip install groq
7
+ API key: https://console.groq.com
8
+ """
9
+
10
+ import os
11
+ import json
12
+ import torch
13
+ from groq import Groq
14
+ from transformers import pipeline as hf_pipeline, AutoTokenizer
15
+ from dotenv import load_dotenv
16
+ load_dotenv()
17
+
18
+ # ── Config ────────────────────────────────────────────────────────────────────
19
+
20
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
21
+ print("API KEY:", GROQ_API_KEY)
22
+ MODEL3_DIR = "../model_3" # path to your saved Model 3
23
+ GROQ_MODEL = "openai/gpt-oss-120b" # same model you're already using
24
+ MAX_LEN = 512 # must match Model 3 training config
25
+ CONF_THRESHOLD = 0.7 # flag pairs below this as uncertain
26
+
27
+ CLAUSE_TYPES = [
28
+ "termination",
29
+ "warranty",
30
+ "indemnification",
31
+ "ip_ownership",
32
+ "dispute_resolution",
33
+ "confidentiality",
34
+ "liability_cap",
35
+ "governing_law",
36
+ "payment",
37
+ "non_compete",
38
+ "force_majeure",
39
+ "assignment",
40
+ ]
41
+
42
+ # ── Groq client ───────────────────────────────────────────────────────────────
43
+
44
+ groq_client = Groq(api_key=GROQ_API_KEY)
45
+
46
+ # ── Step 1: Extract clauses from a single contract ────────────────────────────
47
+
48
+ EXTRACTION_SYSTEM_PROMPT = """You are a legal clause extraction engine.
49
+ Your job is to extract distinct legal clauses from contract text.
50
+ You must return ONLY a valid JSON array β€” no explanation, no markdown fences, no preamble.
51
+ Never extract financial covenant clauses with numeric thresholds β€” those are handled separately."""
52
+
53
+ EXTRACTION_USER_PROMPT = """Extract all distinct legal clauses from this contract.
54
+
55
+ For each clause return:
56
+ - clause_type: one of [{clause_types}]
57
+ - clause_text: the core legal obligation rewritten concisely in 1-2 sentences. Max 60 words. Do NOT copy verbatim.
58
+
59
+ Rules:
60
+ - One entry per clause_type maximum. If duplicates exist, keep the most restrictive.
61
+ - Skip purely numeric clauses like "maintain debt ratio >= 2.5" β€” financial covenants only.
62
+ - Skip any clause that does not fit the listed types.
63
+
64
+ Return format β€” JSON array only, nothing else:
65
+ [
66
+ {{"clause_type": "termination", "clause_text": "Either party may terminate with 30 days written notice."}},
67
+ {{"clause_type": "dispute_resolution", "clause_text": "All disputes resolved through binding arbitration in New York."}}
68
+ ]
69
+
70
+ Contract text:
71
+ {contract_text}"""
72
+
73
+
74
+ def extract_clauses(contract_text: str, contract_label: str) -> list[dict]:
75
+ """
76
+ Call Groq to extract and classify clauses from one contract.
77
+ Returns list of {clause_type, clause_text, contract} dicts.
78
+ """
79
+ prompt = EXTRACTION_USER_PROMPT.format(
80
+ clause_types=", ".join(CLAUSE_TYPES),
81
+ contract_text=contract_text.strip()
82
+ )
83
+
84
+ # Non-streaming β€” we need the full response before JSON parsing
85
+ completion = groq_client.chat.completions.create(
86
+ model=GROQ_MODEL,
87
+ messages=[
88
+ {"role": "system", "content": EXTRACTION_SYSTEM_PROMPT},
89
+ {"role": "user", "content": prompt},
90
+ ],
91
+ temperature=0, # deterministic extraction
92
+ max_completion_tokens=2048,
93
+ top_p=1,
94
+ reasoning_effort="medium",
95
+ stream=False, # must be False β€” need full JSON before parsing
96
+ stop=None,
97
+ )
98
+
99
+ raw = completion.choices[0].message.content.strip()
100
+
101
+ # Strip markdown fences if model adds them anyway
102
+ if raw.startswith("```"):
103
+ raw = raw.split("```")[1]
104
+ if raw.startswith("json"):
105
+ raw = raw[4:]
106
+ raw = raw.strip()
107
+
108
+ try:
109
+ clauses = json.loads(raw)
110
+ # Handle if model wraps array in a dict
111
+ if isinstance(clauses, dict):
112
+ clauses = next(iter(clauses.values()))
113
+ except json.JSONDecodeError as e:
114
+ print(f"[ERROR] JSON parse failed for {contract_label}: {e}")
115
+ print(f"Raw response was:\n{raw[:400]}")
116
+ return []
117
+
118
+ for c in clauses:
119
+ c["contract"] = contract_label
120
+
121
+ print(f"\n[{contract_label}] Extracted {len(clauses)} clauses:")
122
+ for c in clauses:
123
+ print(f" [{c['clause_type']}] {c['clause_text'][:80]}...")
124
+
125
+ return clauses
126
+
127
+
128
+ # ── Step 2: Pair same-type clauses across contracts ───────────────────────────
129
+
130
+ def generate_pairs(
131
+ clauses_a: list[dict],
132
+ clauses_b: list[dict],
133
+ ) -> list[dict]:
134
+ """
135
+ Match clauses of the same type across Contract A and Contract B.
136
+ Returns list of {clause_type, clause_a, clause_b} dicts.
137
+ """
138
+ index_a = {c["clause_type"]: c["clause_text"] for c in clauses_a}
139
+ index_b = {c["clause_type"]: c["clause_text"] for c in clauses_b}
140
+
141
+ matched_types = set(index_a.keys()) & set(index_b.keys())
142
+ unmatched_types = set(index_a.keys()).symmetric_difference(set(index_b.keys()))
143
+
144
+ pairs = [
145
+ {
146
+ "clause_type": clause_type,
147
+ "clause_a": index_a[clause_type],
148
+ "clause_b": index_b[clause_type],
149
+ }
150
+ for clause_type in matched_types
151
+ ]
152
+
153
+ print(f"\n[PAIRING] {len(pairs)} matching types: {sorted(matched_types)}")
154
+ if unmatched_types:
155
+ print(f"[PAIRING] Only in one contract (skipped): {sorted(unmatched_types)}")
156
+
157
+ return pairs
158
+
159
+
160
+ # ── Step 3: Validate token lengths before inference ───────────────────────────
161
+
162
+ def check_token_length(tokenizer, clause_a: str, clause_b: str, max_len: int) -> int:
163
+ """Returns token count. Warns if truncation will occur."""
164
+ tokens = tokenizer(
165
+ f"{clause_a} [SEP] {clause_b}",
166
+ return_tensors="pt",
167
+ truncation=False,
168
+ )
169
+ length = tokens["input_ids"].shape[1]
170
+ if length > max_len:
171
+ print(f" [WARN] {length} tokens > MAX_LEN {max_len} β€” will be truncated")
172
+ elif length > int(max_len * 0.85):
173
+ print(f" [WARN] {length} tokens is close to limit ({max_len})")
174
+ return length
175
+
176
+
177
+ # ── Step 4: Load and run Model 3 ─────────────────────────────────────────────
178
+
179
+ def load_model3(model_dir: str, max_len: int):
180
+ tokenizer = AutoTokenizer.from_pretrained(model_dir)
181
+ device = 0 if torch.cuda.is_available() else -1
182
+ pipe = hf_pipeline(
183
+ "text-classification",
184
+ model=model_dir,
185
+ tokenizer=tokenizer,
186
+ device=device,
187
+ top_k=None,
188
+ truncation=True,
189
+ max_length=max_len,
190
+ return_token_type_ids=False # πŸ”₯ ADD THIS
191
+ )
192
+ print(f"\n[MODEL3] Loaded from '{model_dir}' on {'GPU' if device == 0 else 'CPU'}")
193
+ return pipe, tokenizer
194
+
195
+
196
+ def score_pairs(
197
+ pairs: list[dict],
198
+ pipe,
199
+ tokenizer,
200
+ max_len: int,
201
+ conf_threshold: float,
202
+ ) -> list[dict]:
203
+ """
204
+ Run Model 3 on each clause pair.
205
+ Returns results sorted: contradictions first, then by confidence descending.
206
+ """
207
+ results = []
208
+
209
+ for pair in pairs:
210
+ clause_a = pair["clause_a"]
211
+ clause_b = pair["clause_b"]
212
+ clause_type = pair["clause_type"]
213
+
214
+ token_len = check_token_length(tokenizer, clause_a, clause_b, max_len)
215
+ raw_result = pipe(f"{clause_a} [SEP] {clause_b}")
216
+
217
+ if raw_result and isinstance(raw_result[0], list):
218
+ raw_result = raw_result[0]
219
+
220
+ scores = {r["label"]: r["score"] for r in raw_result}
221
+ predicted_label = max(scores, key=scores.get)
222
+ predicted_score = scores[predicted_label]
223
+ contradiction_score = scores.get("contradiction", 0.0)
224
+
225
+ results.append({
226
+ "clause_type": clause_type,
227
+ "clause_a": clause_a,
228
+ "clause_b": clause_b,
229
+ "predicted_label": predicted_label,
230
+ "predicted_score": round(predicted_score, 4),
231
+ "contradiction_score": round(contradiction_score, 4),
232
+ "all_scores": {k: round(v, 4) for k, v in scores.items()},
233
+ "token_length": token_len,
234
+ "uncertain": predicted_score < conf_threshold,
235
+ })
236
+
237
+ # Contradictions first, then sorted by confidence descending
238
+ results.sort(key=lambda x: (
239
+ x["predicted_label"] != "contradiction",
240
+ -x["predicted_score"],
241
+ ))
242
+ # βœ… Keep only strong, reliable contradictions
243
+
244
+ return results
245
+
246
+
247
+ # ── Step 5: Print results ─────────────────────────────────────────────────────
248
+
249
+ def print_results(results: list[dict]):
250
+ # πŸ”₯ Split results
251
+ strong = [
252
+ r for r in results
253
+ if r["predicted_label"] == "contradiction"
254
+ and not r["uncertain"]
255
+ and r["predicted_score"] >= 0.75
256
+ ]
257
+
258
+ uncertain = [
259
+ r for r in results
260
+ if r["uncertain"]
261
+ ]
262
+
263
+ print("\n" + "=" * 65)
264
+ print("CONTRACT CONFLICT ANALYSIS")
265
+ print("=" * 65)
266
+
267
+ # βœ… PRIMARY SECTION
268
+ print("\n── HIGH-CONFIDENCE CONFLICTS ─────────────────────────────")
269
+ print("[INFO] These are reliable contradictions (>= 75%)")
270
+
271
+ if strong:
272
+ for r in strong:
273
+ print(f"\n[{r['clause_type'].upper()}] {r['predicted_score']:.2%}")
274
+ print(f"Contract A: {r['clause_a']}")
275
+ print(f"Contract B: {r['clause_b']}")
276
+ else:
277
+ print(" None found.")
278
+
279
+ # ⚠️ SECONDARY SECTION
280
+ print("\n── UNCERTAIN / REVIEW NEEDED ─────────────────────────────")
281
+ print("[INFO] Lower-confidence predictions β€” require human validation")
282
+
283
+ if uncertain:
284
+ for r in uncertain:
285
+ print(f"\n[{r['clause_type'].upper()}] "
286
+ f"{r['predicted_label']} ({r['predicted_score']:.2%})")
287
+ print(f"Contract A: {r['clause_a']}")
288
+ print(f"Contract B: {r['clause_b']}")
289
+ else:
290
+ print(" None.")
291
+
292
+ print("\n" + "=" * 65)
293
+
294
+
295
+ # ── Main pipeline ─────────────────────────────────────────────────────────────
296
+
297
+ def run_pipeline(
298
+ contract_a_text: str,
299
+ contract_b_text: str,
300
+ model3_dir: str = MODEL3_DIR,
301
+ max_len: int = MAX_LEN,
302
+ conf_threshold: float = CONF_THRESHOLD,
303
+ ) -> list[dict]:
304
+ """
305
+ Full pipeline:
306
+ 1. Groq extracts + classifies clauses from both contracts
307
+ 2. Same-type clauses are paired across contracts
308
+ 3. Model 3 scores each pair for contradiction
309
+ 4. Results returned sorted by conflict severity
310
+ """
311
+ print("\n── STEP 1: Extracting clauses via Groq ────────────────────")
312
+ clauses_a = extract_clauses(contract_a_text, "Contract A")
313
+ clauses_b = extract_clauses(contract_b_text, "Contract B")
314
+
315
+ if not clauses_a or not clauses_b:
316
+ print("[ERROR] Extraction returned empty. Check GROQ_API_KEY and contract text.")
317
+ return []
318
+
319
+ print("\n── STEP 2: Generating clause pairs ────────────────────────")
320
+ pairs = generate_pairs(clauses_a, clauses_b)
321
+
322
+ if not pairs:
323
+ print("[WARN] No matching clause types between contracts.")
324
+ return []
325
+
326
+ print(f"\n── STEP 3: Scoring {len(pairs)} pairs with Model 3 ────────")
327
+ pipe, tokenizer = load_model3(model3_dir, max_len)
328
+ results = score_pairs(pairs, pipe, tokenizer, max_len, conf_threshold)
329
+
330
+ print_results(results)
331
+ return results
332
+
333
+
334
+ # ── Example usage ─────────────────────────────────────────────────────────────
335
+
336
+ if __name__ == "__main__":
337
+
338
+ CONTRACT_A = """
339
+ VENDOR AGREEMENT 2024
340
+
341
+ Termination: Either party may terminate this agreement for convenience
342
+ upon 30 days written notice to the other party.
343
+
344
+ Warranties: Seller warrants that all deliverables shall be free from
345
+ defects for a period of 24 months from the date of acceptance by Buyer.
346
+
347
+ Dispute Resolution: All disputes arising under this agreement shall be
348
+ resolved through binding arbitration in New York under AAA rules.
349
+
350
+ Intellectual Property: The Licensee is granted an exclusive, worldwide,
351
+ perpetual license to use the Software and all derivative works.
352
+
353
+ Confidentiality: Neither party shall disclose Confidential Information
354
+ to any third party without prior written consent of the disclosing party.
355
+
356
+ Governing Law: This agreement shall be governed by the laws of Delaware.
357
+ """
358
+
359
+ CONTRACT_B = """
360
+ MASTER SERVICES AGREEMENT 2024
361
+
362
+ Termination: This agreement may only be terminated for cause, specifically
363
+ material breach that remains uncured for 60 days after written notice.
364
+
365
+ Warranties: Seller disclaims all warranties, express or implied, including
366
+ any warranty of merchantability or fitness for a particular purpose.
367
+
368
+ Dispute Resolution: Either party may bring suit in any court of competent
369
+ jurisdiction to resolve disputes arising under this agreement.
370
+
371
+ Intellectual Property: The license granted herein is non-exclusive, limited
372
+ to the United States, and valid for 12 months only from the effective date.
373
+
374
+ Confidentiality: Confidential Information must not be shared with outside
375
+ parties unless the disclosing party agrees in writing beforehand.
376
+
377
+ Governing Law: This agreement is governed by the laws of California.
378
+ """
379
+
380
+ results = run_pipeline(CONTRACT_A, CONTRACT_B)
381
+
382
+ with open("conflict_results.json", "w") as f:
383
+ json.dump(results, f, indent=2)
384
+ print("\nResults saved to conflict_results.json")
main.py ADDED
@@ -0,0 +1,658 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+
4
+
5
+ import io
6
+ import pickle
7
+ import secrets
8
+ import threading
9
+ from datetime import datetime, timezone
10
+ from functools import wraps
11
+
12
+ import httpx
13
+ import urllib.parse
14
+ import numpy as np
15
+ import pandas as pd
16
+ import pdfplumber
17
+ from bson import ObjectId
18
+ from dotenv import load_dotenv
19
+ from flask import Flask, jsonify, redirect, request, session
20
+ from flask_cors import CORS
21
+ from flask_session import Session
22
+ from pymongo import MongoClient
23
+ import certifi
24
+ from werkzeug.security import check_password_hash, generate_password_hash
25
+ from all_model_code.model_1_code.pipeline import ObligationPipeline
26
+ from scheduler_api import scheduler_bp, scheduler, BreachedObligation, ObligationType
27
+
28
+ # ── clause_extractor (for two-contract comparison) ────────────────────────────
29
+ # Adjust EXTRACTOR_DIR if clause_extractor.py lives elsewhere relative to main.py
30
+ EXTRACTOR_DIR = os.path.join(os.path.dirname(__file__), "..")
31
+ sys.path.insert(0, EXTRACTOR_DIR)
32
+ try:
33
+ from clause_extractor import extract_clauses, generate_pairs, load_model3, score_pairs
34
+ EXTRACTOR_AVAILABLE = True
35
+ except ImportError as e:
36
+ print(f"[WARN] clause_extractor not found: {e}. /api/compare will run in mock mode.")
37
+ EXTRACTOR_AVAILABLE = False
38
+
39
+ # ─── Load env ─────────────────────────────────────────────────────────────────
40
+ load_dotenv()
41
+
42
+ MONGO_URI = os.getenv("MONGO_URI")
43
+ GOOGLE_CLIENT_ID = os.getenv("GOOGLE_CLIENT_ID")
44
+ GOOGLE_SECRET = os.getenv("GOOGLE_CLIENT_SECRET")
45
+ GOOGLE_REDIRECT = os.getenv("GOOGLE_REDIRECT_URI")
46
+ FRONTEND_URL = os.getenv("FRONTEND_URL", "http://localhost:3000")
47
+ SECRET_KEY = os.getenv("SECRET_KEY", secrets.token_hex(32))
48
+ IS_PROD = os.getenv("FLASK_ENV", "development") == "production"
49
+ CONF_THRESHOLD = float(os.getenv("CONF_THRESHOLD", "0.7"))
50
+ MAX_LEN = int(os.getenv("MAX_LEN", "512"))
51
+
52
+ # ─── MongoDB ──────────────────────────────────────────────────────────────────
53
+ try:
54
+ mongo_client = MongoClient(MONGO_URI, tlsCAFile=certifi.where())
55
+ _db = mongo_client["userinfo"]
56
+ _db.users.create_index("email", unique=True, sparse=True)
57
+ print("Connected to MongoDB")
58
+ except Exception as e:
59
+ print(f"MongoDB connection failed: {e}")
60
+ if "SSL" in str(e) or "tls" in str(e).lower():
61
+ print("\n" + "!" * 60)
62
+ print("CRITICAL: ATLAS IP WHITELIST BLOCKED!")
63
+ print("MongoDB Atlas enforces IP whitelisting by aggressively dropping")
64
+ print("the TLS/SSL handshake. This 'tls1 alert internal error' means")
65
+ print("your current network IP is not added to your Atlas allowlist.")
66
+ print("Log into MongoDB Atlas -> Security -> Network Access -> Add IP")
67
+ print("!" * 60 + "\n")
68
+ raise
69
+
70
+ def db():
71
+ return _db
72
+
73
+ def now():
74
+ return datetime.now(timezone.utc)
75
+
76
+ # ─── App setup ────────────────────────────────────────────────────────────────
77
+ app = Flask(__name__)
78
+ # app.secret_key = secrets.token_hex(16)
79
+
80
+ CORS(
81
+ app,
82
+ origins = [FRONTEND_URL, "http://localhost:3000", "http://127.0.0.1:3000"],
83
+ supports_credentials = True,
84
+ allow_headers = ["Content-Type", "Authorization"],
85
+ methods = ["GET", "POST", "PUT", "DELETE", "OPTIONS"],
86
+ )
87
+
88
+ app.register_blueprint(scheduler_bp)
89
+ app.config.update(
90
+ SECRET_KEY = os.getenv("SECRET_KEY") or secrets.token_hex(32),
91
+ SESSION_TYPE = "filesystem",
92
+ SESSION_FILE_DIR = os.path.join(os.getcwd(), "session_data"),
93
+ SESSION_COOKIE_SAMESITE = "Lax",
94
+ SESSION_COOKIE_SECURE = IS_PROD,
95
+ )
96
+ Session(app)
97
+
98
+ http = httpx.Client()
99
+
100
+ # ─── Auth helpers ─────────────────────────────────────────────────────────────
101
+ def require_auth(fn):
102
+ @wraps(fn)
103
+ def inner(*a, **kw):
104
+ if "user_id" not in session:
105
+ return jsonify({"error": "Not authenticated"}), 401
106
+ return fn(*a, **kw)
107
+ return inner
108
+
109
+ def uid():
110
+ return session.get("user_id")
111
+
112
+ def serialize_user(user):
113
+ """Return a safe dict β€” never exposes the hashed password."""
114
+ return {
115
+ "id": str(user["_id"]),
116
+ "name": user.get("name"),
117
+ "email": user.get("email"),
118
+ "picture": user.get("picture"),
119
+ "provider": user.get("provider", "email"),
120
+ "created_at": user["created_at"].isoformat() if user.get("created_at") else None,
121
+ }
122
+
123
+ # ─── Email / Password auth ────────────────────────────────────────────────────
124
+
125
+ @app.route("/auth/register", methods=["POST"])
126
+ def register():
127
+ data = request.get_json(silent=True) or {}
128
+ name = (data.get("name") or "").strip()
129
+ email = (data.get("email") or "").strip().lower()
130
+ password = data.get("password") or ""
131
+
132
+ if not name:
133
+ return jsonify({"error": "Name is required"}), 400
134
+ if not email or "@" not in email:
135
+ return jsonify({"error": "A valid email is required"}), 400
136
+ if len(password) < 8:
137
+ return jsonify({"error": "Password must be at least 8 characters"}), 400
138
+ if db().users.find_one({"email": email, "provider": "email"}):
139
+ return jsonify({"error": "An account with that email already exists"}), 409
140
+
141
+ hashed = generate_password_hash(password)
142
+ user_doc = {
143
+ "name": name, "email": email, "password": hashed,
144
+ "picture": None, "provider": "email", "provider_id": None,
145
+ "created_at": now(), "updated_at": now(),
146
+ }
147
+ result = db().users.insert_one(user_doc)
148
+ session["user_id"] = str(result.inserted_id)
149
+ session["user_name"] = name
150
+ user_doc["_id"] = result.inserted_id
151
+ return jsonify({"message": "Account created", "user": serialize_user(user_doc)}), 201
152
+
153
+
154
+ @app.route("/auth/login", methods=["POST"])
155
+ def login():
156
+ data = request.get_json(silent=True) or {}
157
+ email = (data.get("email") or "").strip().lower()
158
+ password = data.get("password") or ""
159
+
160
+ if not email or not password:
161
+ return jsonify({"error": "Email and password are required"}), 400
162
+
163
+ user = db().users.find_one({"email": email, "provider": "email"})
164
+ dummy_hash = generate_password_hash("__dummy__")
165
+ stored_hash = user["password"] if user else dummy_hash
166
+ valid = check_password_hash(stored_hash, password)
167
+
168
+ if not user or not valid:
169
+ return jsonify({"error": "Invalid email or password"}), 401
170
+
171
+ session["user_id"] = str(user["_id"])
172
+ session["user_name"] = user.get("name")
173
+ return jsonify({"message": "Logged in", "user": serialize_user(user)})
174
+
175
+ # ─── Google OAuth ─────────────────────────────────────────────────────────────
176
+
177
+ @app.route("/auth/login/google")
178
+ def google_login():
179
+ state = secrets.token_urlsafe(16)
180
+ session["oauth_state"] = state
181
+ params = {
182
+ "client_id": GOOGLE_CLIENT_ID, "redirect_uri": GOOGLE_REDIRECT,
183
+ "response_type": "code", "scope": "openid email profile",
184
+ "state": state, "access_type": "online", "prompt": "select_account",
185
+ }
186
+ return redirect(f"https://accounts.google.com/o/oauth2/v2/auth?{urllib.parse.urlencode(params)}")
187
+
188
+
189
+ @app.route("/auth/callback/google")
190
+ def google_callback():
191
+ if request.args.get("state") != session.pop("oauth_state", None):
192
+ return jsonify({"error": "Invalid state β€” possible CSRF"}), 400
193
+
194
+ code = request.args.get("code")
195
+ if not code:
196
+ return jsonify({"error": "No authorization code returned from Google"}), 400
197
+
198
+ token_res = http.post(
199
+ "https://oauth2.googleapis.com/token",
200
+ data={
201
+ "code": code, "client_id": GOOGLE_CLIENT_ID,
202
+ "client_secret": GOOGLE_SECRET, "redirect_uri": GOOGLE_REDIRECT,
203
+ "grant_type": "authorization_code",
204
+ },
205
+ ).json()
206
+
207
+ access_token = token_res.get("access_token")
208
+ if not access_token:
209
+ return jsonify({"error": "Token exchange failed", "detail": token_res}), 400
210
+
211
+ info = http.get(
212
+ "https://www.googleapis.com/oauth2/v3/userinfo",
213
+ headers={"Authorization": f"Bearer {access_token}"},
214
+ ).json()
215
+
216
+ if not info.get("sub"):
217
+ return jsonify({"error": "Could not retrieve user info from Google"}), 400
218
+
219
+ user = db().users.find_one_and_update(
220
+ {"provider_id": info["sub"], "provider": "google"},
221
+ {
222
+ "$set": {"name": info.get("name"), "email": info.get("email"),
223
+ "picture": info.get("picture"), "updated_at": now()},
224
+ "$setOnInsert": {"provider": "google", "provider_id": info["sub"], "created_at": now()},
225
+ },
226
+ upsert=True,
227
+ return_document=True,
228
+ )
229
+
230
+ session["user_id"] = str(user["_id"])
231
+ session["user_name"] = info.get("name")
232
+ return redirect(f"{FRONTEND_URL}/dashboard")
233
+
234
+ # ─── Session routes ───────────────────────────────────────────────────────────
235
+
236
+ @app.route("/auth/me")
237
+ def auth_me():
238
+ if "user_id" not in session:
239
+ return jsonify({"authenticated": False}), 200
240
+ user = db().users.find_one({"_id": ObjectId(uid())})
241
+ if not user:
242
+ session.clear()
243
+ return jsonify({"authenticated": False}), 200
244
+ return jsonify({"authenticated": True, "user": serialize_user(user)})
245
+
246
+
247
+ @app.route("/auth/logout", methods=["POST"])
248
+ def logout():
249
+ session.clear()
250
+ return jsonify({"message": "Logged out"})
251
+
252
+
253
+ @app.route("/api/profile")
254
+ @require_auth
255
+ def profile():
256
+ user = db().users.find_one({"_id": ObjectId(uid())})
257
+ if not user:
258
+ return jsonify({"error": "User not found"}), 404
259
+ return jsonify(serialize_user(user))
260
+
261
+ # ─── ML Model Registry (lazy-loaded, thread-safe) ────────────────────────────
262
+
263
+ _models: dict = {}
264
+ _model_lock = threading.Lock()
265
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
266
+ _compare_model_cache: dict = {} # separate cache for clause_extractor's model3
267
+
268
+ def _load_obligation_pipeline():
269
+ config = {
270
+ "model_name": os.path.join(BASE_DIR, "ckpt_obligation_fast"),
271
+ "device": "cpu",
272
+ "filter_min_confidence": 0.1,
273
+ "min_fields": 2,
274
+ }
275
+ return ObligationPipeline(config)
276
+
277
+ def _load_nli_model():
278
+ from transformers import pipeline as hf_pipeline
279
+ path = os.path.join(BASE_DIR, "model_3")
280
+ return hf_pipeline(
281
+ "text-classification", model=path, tokenizer=path,
282
+ device=-1, top_k=None, truncation=True, max_length=128,
283
+ )
284
+
285
+ def _load_risk_bundle():
286
+ pkl_path = os.path.join(BASE_DIR, "risk_model_v10_extended.pkl")
287
+ with open(pkl_path, "rb") as f:
288
+ return pickle.load(f)
289
+
290
+ def get_model(key: str):
291
+ if key not in _models:
292
+ with _model_lock:
293
+ if key not in _models:
294
+ print(f"[ML] Loading model: {key} …")
295
+ if key == "obligation":
296
+ _models[key] = _load_obligation_pipeline()
297
+ elif key == "nli":
298
+ _models[key] = _load_nli_model()
299
+ elif key == "risk":
300
+ _models[key] = _load_risk_bundle()
301
+ print(f"[ML] Model '{key}' ready.")
302
+ return _models[key]
303
+
304
+ def _get_compare_model():
305
+ """Lazy-load clause_extractor's model3 (used by /api/compare)."""
306
+ if not _compare_model_cache:
307
+ pipe, tokenizer = load_model3(os.path.join(BASE_DIR, "model_3"), MAX_LEN)
308
+ _compare_model_cache["pipe"] = pipe
309
+ _compare_model_cache["tokenizer"] = tokenizer
310
+ return _compare_model_cache["pipe"], _compare_model_cache["tokenizer"]
311
+
312
+ # ─── PDF / text extraction helper ────────────────────────────────────────────
313
+
314
+ def extract_text_from_request() -> str:
315
+ if "file" in request.files:
316
+ raw = request.files["file"].read()
317
+ with pdfplumber.open(io.BytesIO(raw)) as pdf:
318
+ return "\n".join(p.extract_text() or "" for p in pdf.pages)
319
+ return (request.get_json(silent=True) or {}).get("text", "")
320
+
321
+ # ─── COVENANT OBLIGATION EXTRACTION (/api/analyze) ───────────────────────────
322
+
323
+ @app.route("/api/analyze", methods=["POST"])
324
+ # @require_auth # Uncomment to lock behind auth
325
+ def analyze_contract():
326
+ text = extract_text_from_request()
327
+
328
+ print("\n" + "=" * 40)
329
+ print("--- 1. INCOMING TEXT TO AI ---")
330
+ print(text[:400].strip() if text else "WARNING: TEXT IS EMPTY!")
331
+ print("=" * 40 + "\n")
332
+
333
+ if not text.strip():
334
+ return jsonify({"error": "No contract text provided"}), 400
335
+
336
+ pipeline = get_model("obligation")
337
+ try:
338
+ raw_results = pipeline.process(
339
+ source=text, source_type="text", contract_id="api_upload", debug=True
340
+ )
341
+ print("\n" + "=" * 40)
342
+ print("--- 2. RAW PIPELINE OUTPUT ---")
343
+ print(raw_results)
344
+ print("=" * 40 + "\n")
345
+
346
+ try:
347
+ from all_model_code.model_1_code.stage1_ingestion import ingest
348
+ from all_model_code.model_1_code.stage2_cleaning import clean_text
349
+ cleaned_text = clean_text(ingest(text, "text"))
350
+ except Exception:
351
+ cleaned_text = text
352
+
353
+ obligations = []
354
+ for i, r in enumerate(raw_results):
355
+ metric_name = r.get("metric_name", "Unknown Metric")
356
+ op = r.get("operator", "must maintain")
357
+ val = r.get("threshold_value", "a specific value")
358
+ score = r.get("confidence_score", 0.5)
359
+ risk = max(5, min(95, round((1 - score) * 80 + 10)))
360
+ obligations.append({
361
+ "id": f"C{i+1}",
362
+ "clause": str(metric_name).replace("_", " ").title()[:30],
363
+ "type": "Financial Covenant",
364
+ "desc": f"The entity {op} a {metric_name} of {val}.",
365
+ "confidence": round(score * 100, 1),
366
+ "risk": risk,
367
+ "source_text": r.get("source_text", ""),
368
+ })
369
+
370
+ if not obligations:
371
+ return jsonify({"error": "No strict numerical obligations found."}), 422
372
+
373
+ return jsonify({
374
+ "obligations": obligations,
375
+ "clause_count": len(obligations),
376
+ "contract_text": cleaned_text,
377
+ })
378
+
379
+ except Exception as e:
380
+ print(f"[analyze] Pipeline error: {e}")
381
+ return jsonify({"error": "Failed to process contract through AI pipeline."}), 500
382
+
383
+ # ─── CROSS-CONTRACT CONFLICT COMPARISON (/api/compare) ───────────────────────
384
+ # Uses clause_extractor.py + Groq to extract and compare two full contracts.
385
+ # Falls back to mock data when EXTRACTOR_AVAILABLE is False.
386
+
387
+ MOCK_COMPARE_RESPONSE = {
388
+ "clauses_a": [
389
+ {"clause_type": "termination", "clause_text": "Either party may terminate this agreement for convenience upon 30 days written notice.", "contract": "Contract A"},
390
+ {"clause_type": "warranty", "clause_text": "Seller warrants all deliverables shall be free from defects for 24 months from acceptance.", "contract": "Contract A"},
391
+ {"clause_type": "dispute_resolution", "clause_text": "All disputes shall be resolved through binding arbitration in New York under AAA rules.", "contract": "Contract A"},
392
+ {"clause_type": "ip_ownership", "clause_text": "Licensee is granted an exclusive, worldwide, perpetual license to use the Software.", "contract": "Contract A"},
393
+ {"clause_type": "confidentiality", "clause_text": "Neither party shall disclose Confidential Information to any third party without prior written consent.", "contract": "Contract A"},
394
+ {"clause_type": "governing_law", "clause_text": "This agreement shall be governed by the laws of Delaware.", "contract": "Contract A"},
395
+ ],
396
+ "clauses_b": [
397
+ {"clause_type": "termination", "clause_text": "This agreement may only be terminated for cause β€” material breach uncured for 60 days after notice.", "contract": "Contract B"},
398
+ {"clause_type": "warranty", "clause_text": "Seller disclaims all warranties, express or implied, including merchantability or fitness for purpose.", "contract": "Contract B"},
399
+ {"clause_type": "dispute_resolution", "clause_text": "Either party may bring suit in any court of competent jurisdiction to resolve disputes.", "contract": "Contract B"},
400
+ {"clause_type": "ip_ownership", "clause_text": "License granted is non-exclusive, limited to the United States, valid for 12 months only.", "contract": "Contract B"},
401
+ {"clause_type": "confidentiality", "clause_text": "Confidential Information must not be shared with outside parties unless the disclosing party agrees.", "contract": "Contract B"},
402
+ {"clause_type": "governing_law", "clause_text": "This agreement is governed by the laws of California.", "contract": "Contract B"},
403
+ ],
404
+ "conflicts": [
405
+ {"clause_type": "termination", "clause_a": "Either party may terminate this agreement for convenience upon 30 days written notice.", "clause_b": "This agreement may only be terminated for cause β€” material breach uncured for 60 days after notice.", "predicted_label": "contradiction", "predicted_score": 0.9312, "contradiction_score": 0.9312, "all_scores": {"contradiction": 0.9312, "entailment": 0.0421, "neutral": 0.0267}, "token_length": 87, "uncertain": False},
406
+ {"clause_type": "warranty", "clause_a": "Seller warrants all deliverables shall be free from defects for 24 months from acceptance.", "clause_b": "Seller disclaims all warranties, express or implied, including merchantability or fitness for purpose.", "predicted_label": "contradiction", "predicted_score": 0.9741, "contradiction_score": 0.9741, "all_scores": {"contradiction": 0.9741, "entailment": 0.0159, "neutral": 0.0100}, "token_length": 72, "uncertain": False},
407
+ {"clause_type": "dispute_resolution", "clause_a": "All disputes shall be resolved through binding arbitration in New York under AAA rules.", "clause_b": "Either party may bring suit in any court of competent jurisdiction to resolve disputes.", "predicted_label": "contradiction", "predicted_score": 0.8823, "contradiction_score": 0.8823, "all_scores": {"contradiction": 0.8823, "entailment": 0.0712, "neutral": 0.0465}, "token_length": 65, "uncertain": False},
408
+ {"clause_type": "ip_ownership", "clause_a": "Licensee is granted an exclusive, worldwide, perpetual license to use the Software.", "clause_b": "License granted is non-exclusive, limited to the United States, valid for 12 months only.", "predicted_label": "contradiction", "predicted_score": 0.9567, "contradiction_score": 0.9567, "all_scores": {"contradiction": 0.9567, "entailment": 0.0281, "neutral": 0.0152}, "token_length": 68, "uncertain": False},
409
+ {"clause_type": "governing_law", "clause_a": "This agreement shall be governed by the laws of Delaware.", "clause_b": "This agreement is governed by the laws of California.", "predicted_label": "contradiction", "predicted_score": 0.7834, "contradiction_score": 0.7834, "all_scores": {"contradiction": 0.7834, "entailment": 0.1243, "neutral": 0.0923}, "token_length": 45, "uncertain": False},
410
+ {"clause_type": "confidentiality", "clause_a": "Neither party shall disclose Confidential Information to any third party without prior written consent.", "clause_b": "Confidential Information must not be shared with outside parties unless the disclosing party agrees.", "predicted_label": "neutral", "predicted_score": 0.5821, "contradiction_score": 0.2341, "all_scores": {"contradiction": 0.2341, "entailment": 0.1838, "neutral": 0.5821}, "token_length": 58, "uncertain": True},
411
+ ],
412
+ }
413
+
414
+
415
+ @app.route("/api/compare", methods=["POST"])
416
+ def compare_contracts():
417
+ """
418
+ POST /api/compare
419
+ Body: { "contract_a": "...", "contract_b": "..." }
420
+ Returns: { clauses_a, clauses_b, conflicts }
421
+
422
+ Extracts clauses from both contracts via Groq (clause_extractor.py) then
423
+ scores each matched pair with model_3 for entailment / contradiction.
424
+ Falls back to MOCK_COMPARE_RESPONSE when clause_extractor is unavailable.
425
+ """
426
+ data = request.get_json(force=True, silent=True) or {} # force=True ignores Content-Type
427
+ contract_a = (data.get("contract_a") or "").strip()
428
+ contract_b = (data.get("contract_b") or "").strip()
429
+
430
+ if not contract_a or not contract_b:
431
+ print(f"[compare] 400 β€” got keys: {list(data.keys())}, "
432
+ f"a={bool(contract_a)}, b={bool(contract_b)}")
433
+ return jsonify({"error": "Both contract_a and contract_b are required"}), 400
434
+
435
+ if not EXTRACTOR_AVAILABLE:
436
+ print("[MOCK] clause_extractor unavailable β€” returning mock compare data")
437
+ return jsonify(MOCK_COMPARE_RESPONSE)
438
+
439
+ try:
440
+ print("\n[API] Extracting clauses via Groq...")
441
+ clauses_a = extract_clauses(contract_a, "Contract A")
442
+ clauses_b = extract_clauses(contract_b, "Contract B")
443
+
444
+ if not clauses_a or not clauses_b:
445
+ return jsonify({"error": "Clause extraction returned empty. Check GROQ_API_KEY."}), 500
446
+
447
+ print("[API] Generating pairs...")
448
+ pairs = generate_pairs(clauses_a, clauses_b)
449
+
450
+ conflicts = []
451
+ if pairs:
452
+ print(f"[API] Scoring {len(pairs)} pairs with model_3...")
453
+ pipe, tokenizer = _get_compare_model()
454
+ conflicts = score_pairs(pairs, pipe, tokenizer, MAX_LEN, CONF_THRESHOLD)
455
+
456
+ return jsonify({"clauses_a": clauses_a, "clauses_b": clauses_b, "conflicts": conflicts})
457
+
458
+ except Exception as e:
459
+ print(f"[compare] Pipeline error: {e}")
460
+ return jsonify({"error": str(e)}), 500
461
+
462
+ # ─── SINGLE-CLAUSE NLI (/api/conflicts) ──────────────────────────────────────
463
+
464
+ @app.route("/api/conflicts", methods=["POST"])
465
+ def detect_conflicts():
466
+ """
467
+ POST /api/conflicts
468
+ Body: { "clause1": "...", "clause2": "..." }
469
+ Returns: { label, confidence, scores }
470
+ """
471
+ data = request.get_json(silent=True) or {}
472
+ clause1 = (data.get("clause1") or "").strip()
473
+ clause2 = (data.get("clause2") or "").strip()
474
+
475
+ if not clause1 or not clause2:
476
+ return jsonify({"error": "Both clause1 and clause2 are required"}), 400
477
+
478
+ pipe = get_model("nli")
479
+ raw = pipe(f"{clause1} [SEP] {clause2}")
480
+ if raw and isinstance(raw[0], list):
481
+ raw = raw[0]
482
+
483
+ scores = {r["label"]: round(r["score"] * 100, 2) for r in raw}
484
+ best = max(scores, key=scores.get)
485
+ return jsonify({"label": best, "confidence": scores[best], "scores": scores})
486
+
487
+ # ─── RISK FORECAST (/api/risk) ────────────────────────────────────────────────
488
+
489
+ import sys as _sys
490
+ _sys.path.insert(0, os.path.join(BASE_DIR, 'model_2'))
491
+ try:
492
+ from inference_demo import load_ticker_data, build_risk_score
493
+ except ImportError:
494
+ print("[!] Warning: Could not import inference_demo from ../model_2")
495
+ def load_ticker_data(ticker, d): raise NotImplementedError("inference_demo not found")
496
+ def build_risk_score(df): raise NotImplementedError("inference_demo not found")
497
+
498
+
499
+ @app.route("/api/risk", methods=["GET"])
500
+ def risk_forecast():
501
+ """GET /api/risk?ticker=AAPL&horizon=90"""
502
+ ticker = (request.args.get("ticker") or "AAPL").upper()
503
+ horizon = int(request.args.get("horizon", 90))
504
+
505
+ bundle = get_model("risk")
506
+ if ticker not in bundle["models"]:
507
+ return jsonify({"error": f"{ticker} not in model"}), 404
508
+
509
+ try:
510
+ df = load_ticker_data(ticker.lower(), os.path.join(BASE_DIR, 'data/Stocks'))
511
+ fe = build_risk_score(df)
512
+ except Exception as e:
513
+ return jsonify({"error": f"Failed to engineer features: {e}"}), 500
514
+
515
+ payload = bundle["models"][ticker]
516
+ model = payload["model"]
517
+ r_min = payload["r_min"]
518
+ r_max = payload["r_max"]
519
+ threshold = payload["threshold"]
520
+
521
+ current_risk_raw = fe['risk_raw'].iloc[-1]
522
+ current_risk_norm = (current_risk_raw - r_min) / (r_max - r_min + 1e-9)
523
+
524
+ future = model.make_future_dataframe(periods=horizon, freq='B')
525
+ forecast = model.predict(future)
526
+
527
+ # ── BUG FIX: always use the DatetimeIndex, never the 'Date' column.
528
+ # The 'Date' column can resolve to today on some machines, which makes
529
+ # is_future_or_current False for every row and leaves all yhat = null.
530
+ last_date = pd.to_datetime(fe.index[-1])
531
+ print(f"[DEBUG] last_date={last_date} fe.shape={fe.shape}")
532
+
533
+ future_fc = forecast[forecast['ds'] > last_date]
534
+
535
+ breach_detected = False
536
+ days_to_breach = None
537
+ confidence = "NONE"
538
+ breach_date = None
539
+
540
+ for conf, col in [('HIGH', 'yhat_lower'), ('MEDIUM', 'yhat'), ('LOW', 'yhat_upper')]:
541
+ rows = future_fc[future_fc[col] > threshold]
542
+ if not rows.empty:
543
+ breach_detected = True
544
+ confidence = conf
545
+ breach_date = str(rows.iloc[0]['ds'].date())
546
+ days_to_breach = max((rows.iloc[0]['ds'] - last_date).days, 0)
547
+ break
548
+
549
+ if breach_detected and breach_date:
550
+ try:
551
+ ob_type = ObligationType.LIQUIDITY_RATIO if ticker == 'CHK' else ObligationType.REVENUE
552
+ breach_obj = BreachedObligation(
553
+ contract_id=f"AUTO-{ticker}",
554
+ obligation_type=ob_type,
555
+ metric_name="Financial Risk Score",
556
+ threshold_value=round(float(threshold), 2),
557
+ current_value=round(float(current_risk_norm), 2),
558
+ predicted_value=None,
559
+ deadline=breach_date,
560
+ consequence="Covenant Violation Predicted by Prophet Model",
561
+ conflict_with=None,
562
+ )
563
+ scheduler.process_breach(breach_obj)
564
+ except Exception as e:
565
+ print(f"Failed to auto-schedule breach: {e}")
566
+
567
+ def norm(val):
568
+ span = r_max - r_min if r_max != r_min else 1
569
+ return round(float(np.clip((val - r_min) / span * 100, 0, 100)), 2)
570
+
571
+ # Build a DatetimeIndex-keyed lookup for fast y_norm resolution
572
+ fe_indexed = fe["risk_raw"] if fe.index.dtype == "datetime64[ns]" else fe.set_index(pd.to_datetime(fe.index))["risk_raw"]
573
+
574
+ series = []
575
+ for _, row in forecast.iterrows():
576
+ ds_ts = pd.Timestamp(row["ds"])
577
+ is_future_or_current = ds_ts >= last_date
578
+
579
+ # Historical actual value β€” look up by normalised date key
580
+ y_norm = None
581
+ try:
582
+ y_val = fe_indexed.loc[ds_ts]
583
+ y_norm = norm(float(y_val))
584
+ except KeyError:
585
+ pass
586
+
587
+ series.append({
588
+ "ds": str(ds_ts.date()),
589
+ "y": y_norm,
590
+ "yhat": round(float(np.clip(row["yhat"] * 100, 0, 100)), 2) if is_future_or_current else None,
591
+ "yhat_lower": round(float(np.clip(row["yhat_lower"] * 100, 0, 100)), 2) if is_future_or_current else None,
592
+ "yhat_upper": round(float(np.clip(row["yhat_upper"] * 100, 0, 100)), 2) if is_future_or_current else None,
593
+ "yhat_range": [
594
+ round(float(np.clip(row["yhat_lower"] * 100, 0, 100)), 2),
595
+ round(float(np.clip(row["yhat_upper"] * 100, 0, 100)), 2),
596
+ ] if is_future_or_current else None,
597
+ })
598
+
599
+ return jsonify({
600
+ "ticker": ticker,
601
+ "available_tickers": list(bundle["models"].keys()),
602
+ "last_update_date": str(last_date.date()),
603
+ "current_price": round(fe['Close'].iloc[-1], 2),
604
+ "risk_metrics": {
605
+ "current_score": round(current_risk_norm, 4),
606
+ "danger_threshold": round(threshold, 4),
607
+ "is_in_danger_zone": bool(current_risk_norm > threshold),
608
+ },
609
+ "forecast": {
610
+ "breach_predicted": breach_detected,
611
+ "estimated_days_to_breach": days_to_breach,
612
+ "confidence_level": confidence,
613
+ },
614
+ # Legacy flat keys kept for frontend backward-compat
615
+ "breach_detected": breach_detected,
616
+ "breach_date": breach_date,
617
+ "days_to_breach": days_to_breach,
618
+ "confidence_tier": confidence,
619
+ "risk_score": round(current_risk_norm * 100, 2),
620
+ "threshold": round(float(threshold * 100), 2),
621
+ "forecast_series": series,
622
+ "model_meta": {
623
+ "run_date": str(last_date.date()),
624
+ "horizon_days": horizon,
625
+ "target_threshold": round(float(threshold * 100), 2),
626
+ },
627
+ })
628
+
629
+
630
+ @app.route("/api/risk/all", methods=["GET"])
631
+ def get_all():
632
+ import json
633
+ mock_path = os.path.join(BASE_DIR, 'model_2/frontend_mock_api.json')
634
+ if os.path.exists(mock_path):
635
+ with open(mock_path) as f:
636
+ return jsonify(json.load(f))
637
+ return jsonify({"error": "frontend_mock_api.json not found"}), 404
638
+
639
+
640
+ @app.route("/api/risk/tickers", methods=["GET"])
641
+ def risk_tickers():
642
+ bundle = get_model("risk")
643
+ return jsonify({"tickers": list(bundle["models"].keys())})
644
+
645
+ # ─── Health check ─────────────────────────────────────────────────────────────
646
+
647
+ @app.route("/health")
648
+ def health():
649
+ return jsonify({"status": "ok", "extractor_available": EXTRACTOR_AVAILABLE})
650
+
651
+ # ─── Entry point ──────────────────────────────────────────────────────────────
652
+
653
+ if __name__ == "__main__":
654
+ os.makedirs("session_data", exist_ok=True)
655
+ # Disable watchdog reloader on Windows β€” causes WinError 10038 when
656
+ # ML model directories are watched. Debug logging stays active.
657
+ use_reloader = sys.platform != "win32" and not IS_PROD
658
+ app.run(host="0.0.0.0", port=7860)
model3.py ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ContractPulse - Flask Backend
3
+ Wraps clause_extractor.py pipeline for Next.js frontend consumption.
4
+
5
+ Run: python app.py
6
+ Requires: pip install flask flask-cors python-dotenv groq transformers torch
7
+ """
8
+
9
+ import os
10
+ import sys
11
+ import json
12
+ from flask import Flask, request, jsonify
13
+ from flask_cors import CORS
14
+ from dotenv import load_dotenv
15
+
16
+ load_dotenv()
17
+
18
+ # ── Add parent directory to path so clause_extractor.py is importable ────────
19
+ # Adjust this path to wherever clause_extractor.py lives relative to app.py
20
+ EXTRACTOR_DIR = os.path.join(os.path.dirname(__file__), "..")
21
+ sys.path.insert(0, EXTRACTOR_DIR)
22
+
23
+ try:
24
+ from clause_extractor import extract_clauses, generate_pairs, load_model3, score_pairs
25
+ EXTRACTOR_AVAILABLE = True
26
+ except ImportError as e:
27
+ print(f"[WARN] clause_extractor not found: {e}. Running in mock mode.")
28
+ EXTRACTOR_AVAILABLE = False
29
+
30
+ app = Flask(__name__)
31
+ CORS(app, origins=["http://localhost:3000", "http://127.0.0.1:3000"])
32
+
33
+ # ── Config ────────────────────────────────────────────────────────────────────
34
+ MODEL3_DIR = os.getenv("MODEL3_DIR", "../model_3")
35
+ MAX_LEN = int(os.getenv("MAX_LEN", "512"))
36
+ CONF_THRESHOLD = float(os.getenv("CONF_THRESHOLD", "0.7"))
37
+
38
+ # Cache model so it's not reloaded on every request
39
+ _model_cache = {}
40
+
41
+
42
+ def get_model():
43
+ if not _model_cache:
44
+ pipe, tokenizer = load_model3(MODEL3_DIR, MAX_LEN)
45
+ _model_cache["pipe"] = pipe
46
+ _model_cache["tokenizer"] = tokenizer
47
+ return _model_cache["pipe"], _model_cache["tokenizer"]
48
+
49
+
50
+ # ── Mock data for development (when extractor unavailable) ────────────────────
51
+ MOCK_RESPONSE = {
52
+ "clauses_a": [
53
+ {"clause_type": "termination", "clause_text": "Either party may terminate this agreement for convenience upon 30 days written notice.", "contract": "Contract A"},
54
+ {"clause_type": "warranty", "clause_text": "Seller warrants all deliverables shall be free from defects for 24 months from acceptance.", "contract": "Contract A"},
55
+ {"clause_type": "dispute_resolution", "clause_text": "All disputes shall be resolved through binding arbitration in New York under AAA rules.", "contract": "Contract A"},
56
+ {"clause_type": "ip_ownership", "clause_text": "Licensee is granted an exclusive, worldwide, perpetual license to use the Software.", "contract": "Contract A"},
57
+ {"clause_type": "confidentiality", "clause_text": "Neither party shall disclose Confidential Information to any third party without prior written consent.", "contract": "Contract A"},
58
+ {"clause_type": "governing_law", "clause_text": "This agreement shall be governed by the laws of Delaware.", "contract": "Contract A"},
59
+ ],
60
+ "clauses_b": [
61
+ {"clause_type": "termination", "clause_text": "This agreement may only be terminated for cause β€” material breach uncured for 60 days after notice.", "contract": "Contract B"},
62
+ {"clause_type": "warranty", "clause_text": "Seller disclaims all warranties, express or implied, including merchantability or fitness for purpose.", "contract": "Contract B"},
63
+ {"clause_type": "dispute_resolution", "clause_text": "Either party may bring suit in any court of competent jurisdiction to resolve disputes.", "contract": "Contract B"},
64
+ {"clause_type": "ip_ownership", "clause_text": "License granted is non-exclusive, limited to the United States, valid for 12 months only.", "contract": "Contract B"},
65
+ {"clause_type": "confidentiality", "clause_text": "Confidential Information must not be shared with outside parties unless the disclosing party agrees in writing.", "contract": "Contract B"},
66
+ {"clause_type": "governing_law", "clause_text": "This agreement is governed by the laws of California.", "contract": "Contract B"},
67
+ ],
68
+ "conflicts": [
69
+ {
70
+ "clause_type": "termination",
71
+ "clause_a": "Either party may terminate this agreement for convenience upon 30 days written notice.",
72
+ "clause_b": "This agreement may only be terminated for cause β€” material breach uncured for 60 days after notice.",
73
+ "predicted_label": "contradiction",
74
+ "predicted_score": 0.9312,
75
+ "contradiction_score": 0.9312,
76
+ "all_scores": {"contradiction": 0.9312, "entailment": 0.0421, "neutral": 0.0267},
77
+ "token_length": 87,
78
+ "uncertain": False,
79
+ },
80
+ {
81
+ "clause_type": "warranty",
82
+ "clause_a": "Seller warrants all deliverables shall be free from defects for 24 months from acceptance.",
83
+ "clause_b": "Seller disclaims all warranties, express or implied, including merchantability or fitness for purpose.",
84
+ "predicted_label": "contradiction",
85
+ "predicted_score": 0.9741,
86
+ "contradiction_score": 0.9741,
87
+ "all_scores": {"contradiction": 0.9741, "entailment": 0.0159, "neutral": 0.0100},
88
+ "token_length": 72,
89
+ "uncertain": False,
90
+ },
91
+ {
92
+ "clause_type": "dispute_resolution",
93
+ "clause_a": "All disputes shall be resolved through binding arbitration in New York under AAA rules.",
94
+ "clause_b": "Either party may bring suit in any court of competent jurisdiction to resolve disputes.",
95
+ "predicted_label": "contradiction",
96
+ "predicted_score": 0.8823,
97
+ "contradiction_score": 0.8823,
98
+ "all_scores": {"contradiction": 0.8823, "entailment": 0.0712, "neutral": 0.0465},
99
+ "token_length": 65,
100
+ "uncertain": False,
101
+ },
102
+ {
103
+ "clause_type": "ip_ownership",
104
+ "clause_a": "Licensee is granted an exclusive, worldwide, perpetual license to use the Software.",
105
+ "clause_b": "License granted is non-exclusive, limited to the United States, valid for 12 months only.",
106
+ "predicted_label": "contradiction",
107
+ "predicted_score": 0.9567,
108
+ "contradiction_score": 0.9567,
109
+ "all_scores": {"contradiction": 0.9567, "entailment": 0.0281, "neutral": 0.0152},
110
+ "token_length": 68,
111
+ "uncertain": False,
112
+ },
113
+ {
114
+ "clause_type": "governing_law",
115
+ "clause_a": "This agreement shall be governed by the laws of Delaware.",
116
+ "clause_b": "This agreement is governed by the laws of California.",
117
+ "predicted_label": "contradiction",
118
+ "predicted_score": 0.7834,
119
+ "contradiction_score": 0.7834,
120
+ "all_scores": {"contradiction": 0.7834, "entailment": 0.1243, "neutral": 0.0923},
121
+ "token_length": 45,
122
+ "uncertain": False,
123
+ },
124
+ {
125
+ "clause_type": "confidentiality",
126
+ "clause_a": "Neither party shall disclose Confidential Information to any third party without prior written consent.",
127
+ "clause_b": "Confidential Information must not be shared with outside parties unless the disclosing party agrees in writing.",
128
+ "predicted_label": "neutral",
129
+ "predicted_score": 0.5821,
130
+ "contradiction_score": 0.2341,
131
+ "all_scores": {"contradiction": 0.2341, "entailment": 0.1838, "neutral": 0.5821},
132
+ "token_length": 58,
133
+ "uncertain": True,
134
+ },
135
+ ]
136
+ }
137
+
138
+
139
+ # ── Routes ────────────────────────────────────────────────────────────────────
140
+
141
+ @app.route("/health", methods=["GET"])
142
+ def health():
143
+ return jsonify({"status": "ok", "extractor_available": EXTRACTOR_AVAILABLE})
144
+
145
+
146
+ @app.route("/analyze", methods=["POST"])
147
+ def analyze():
148
+ """
149
+ POST /analyze
150
+ Body: { "contract_a": "...", "contract_b": "..." }
151
+ Returns: { clauses_a, clauses_b, conflicts }
152
+ """
153
+ data = request.get_json()
154
+ if not data:
155
+ return jsonify({"error": "No JSON body provided"}), 400
156
+
157
+ contract_a = data.get("contract_a", "").strip()
158
+ contract_b = data.get("contract_b", "").strip()
159
+
160
+ if not contract_a or not contract_b:
161
+ return jsonify({"error": "Both contract_a and contract_b are required"}), 400
162
+
163
+ # ── Dev mode: return mock data ────────────────────────────────────────────
164
+ if not EXTRACTOR_AVAILABLE:
165
+ print("[MOCK] Returning mock analysis data")
166
+ return jsonify(MOCK_RESPONSE)
167
+
168
+ # ── Production: run the real pipeline ────────────────────────────────────
169
+ try:
170
+ print("\n[API] Extracting clauses via Groq...")
171
+ clauses_a = extract_clauses(contract_a, "Contract A")
172
+ clauses_b = extract_clauses(contract_b, "Contract B")
173
+
174
+ if not clauses_a or not clauses_b:
175
+ return jsonify({"error": "Clause extraction returned empty. Check GROQ_API_KEY."}), 500
176
+
177
+ print("[API] Generating pairs...")
178
+ pairs = generate_pairs(clauses_a, clauses_b)
179
+
180
+ conflicts = []
181
+ if pairs:
182
+ print(f"[API] Scoring {len(pairs)} pairs with Model 3...")
183
+ pipe, tokenizer = get_model()
184
+ conflicts = score_pairs(pairs, pipe, tokenizer, MAX_LEN, CONF_THRESHOLD)
185
+
186
+ return jsonify({
187
+ "clauses_a": clauses_a,
188
+ "clauses_b": clauses_b,
189
+ "conflicts": conflicts,
190
+ })
191
+
192
+ except Exception as e:
193
+ print(f"[ERROR] Pipeline failed: {e}")
194
+ return jsonify({"error": str(e)}), 500
195
+
196
+
197
+ if __name__ == "__main__":
198
+ print("\n[ContractPulse Backend] Starting on http://localhost:5000")
199
+ print(f"[ContractPulse Backend] Extractor available: {EXTRACTOR_AVAILABLE}")
200
+ app.run(debug=True, port=5000)
scheduler_api.py ADDED
@@ -0,0 +1,548 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ContractPulse Scheduler β€” Flask REST API
3
+ =========================================
4
+ Wraps the TaskScheduler + MeetingRoomScheduler into a clean HTTP API.
5
+
6
+ Endpoints:
7
+ POST /api/process_breach β€” process a single breach
8
+ POST /api/process_batch β€” process multiple breaches
9
+ GET /api/tasks β€” list all tasks (optional ?severity=CRITICAL)
10
+ GET /api/meetings β€” list all booked meetings
11
+ GET /api/departments β€” department workload summary
12
+ POST /api/reset β€” reset the scheduler state
13
+ GET /api/health β€” health check
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import os
19
+ import smtplib
20
+ from email.message import EmailMessage
21
+ import uuid
22
+ from dataclasses import dataclass, field
23
+ from datetime import datetime, timedelta
24
+ from enum import Enum
25
+ from typing import Optional
26
+
27
+ from flask import Blueprint, jsonify, request
28
+
29
+ scheduler_bp = Blueprint("scheduler", __name__)
30
+
31
+
32
+ # ─────────────────────────────────────────────────────────────
33
+ # SCHEDULER CORE
34
+ # ─────────────────────────────────────────────────────────────
35
+
36
+ class Department(Enum):
37
+ FINANCE = "Finance"
38
+ LEGAL = "Legal"
39
+ TECH = "Tech"
40
+ OPERATIONS = "Operations"
41
+ COMPLIANCE = "Compliance"
42
+ EXECUTIVE = "Executive"
43
+
44
+
45
+ class Severity(Enum):
46
+ LOW = 1
47
+ MEDIUM = 2
48
+ HIGH = 3
49
+ CRITICAL = 4
50
+
51
+
52
+ class ObligationType(Enum):
53
+ REVENUE = "revenue"
54
+ DEBT_TO_EQUITY = "debt_to_equity_ratio"
55
+ CURRENT_RATIO = "current_ratio"
56
+ REPORT_SUBMISSION = "report_submission"
57
+ WORKFORCE_SIZE = "workforce_size"
58
+ CAPEX_LIMIT = "capex_limit"
59
+ LIQUIDITY_RATIO = "liquidity_ratio"
60
+ INSURANCE_COVERAGE = "insurance_coverage"
61
+ UNKNOWN = "unknown"
62
+
63
+
64
+ OBLIGATION_OWNERS: dict[ObligationType, list[Department]] = {
65
+ ObligationType.REVENUE: [Department.FINANCE, Department.EXECUTIVE],
66
+ ObligationType.DEBT_TO_EQUITY: [Department.FINANCE, Department.LEGAL],
67
+ ObligationType.CURRENT_RATIO: [Department.FINANCE],
68
+ ObligationType.REPORT_SUBMISSION: [Department.COMPLIANCE, Department.LEGAL],
69
+ ObligationType.WORKFORCE_SIZE: [Department.OPERATIONS, Department.LEGAL],
70
+ ObligationType.CAPEX_LIMIT: [Department.FINANCE, Department.OPERATIONS],
71
+ ObligationType.LIQUIDITY_RATIO: [Department.FINANCE, Department.EXECUTIVE],
72
+ ObligationType.INSURANCE_COVERAGE: [Department.COMPLIANCE, Department.LEGAL],
73
+ ObligationType.UNKNOWN: [Department.LEGAL],
74
+ }
75
+
76
+ SEVERITY_ESCALATION: dict[Severity, list[Department]] = {
77
+ Severity.LOW: [],
78
+ Severity.MEDIUM: [],
79
+ Severity.HIGH: [Department.LEGAL],
80
+ Severity.CRITICAL: [Department.LEGAL, Department.EXECUTIVE],
81
+ }
82
+
83
+ SEVERITY_DUE_HOURS: dict[Severity, int] = {
84
+ Severity.LOW: 72,
85
+ Severity.MEDIUM: 48,
86
+ Severity.HIGH: 24,
87
+ Severity.CRITICAL: 4,
88
+ }
89
+
90
+
91
+ @dataclass
92
+ class Room:
93
+ room_id: str
94
+ name: str
95
+ capacity: int
96
+ has_av: bool = True
97
+
98
+
99
+ @dataclass(order=True)
100
+ class MeetingSlot:
101
+ start: datetime
102
+ end: datetime
103
+ meeting_id: str = field(compare=False)
104
+ room_id: str = field(compare=False)
105
+ title: str = field(compare=False)
106
+ attendees: int = field(compare=False, default=4)
107
+
108
+ def overlaps(self, start: datetime, end: datetime) -> bool:
109
+ return self.start < end and self.end > start
110
+
111
+ def to_dict(self) -> dict:
112
+ return {
113
+ "meeting_id": self.meeting_id,
114
+ "title": self.title,
115
+ "room": self.room_id,
116
+ "start": self.start.isoformat(),
117
+ "end": self.end.isoformat(),
118
+ "attendees": self.attendees,
119
+ }
120
+
121
+
122
+ @dataclass
123
+ class Meeting:
124
+ title: str
125
+ start: datetime
126
+ end: datetime
127
+ attendees: int
128
+ needs_av: bool = False
129
+ meeting_id: str = field(default_factory=lambda: str(uuid.uuid4())[:8])
130
+
131
+
132
+ class MeetingRoomScheduler:
133
+ def __init__(self, rooms: list[Room]) -> None:
134
+ self.rooms = sorted(rooms, key=lambda r: r.capacity)
135
+ self._bookings: dict[str, list[MeetingSlot]] = {r.room_id: [] for r in rooms}
136
+
137
+ def reset(self) -> None:
138
+ self._bookings = {r.room_id: [] for r in self.rooms}
139
+
140
+ def schedule(self, meeting: Meeting) -> Optional[MeetingSlot]:
141
+ if meeting.end <= meeting.start:
142
+ return None
143
+ for room in self._eligible_rooms(meeting):
144
+ if self._is_free(room, meeting.start, meeting.end):
145
+ slot = MeetingSlot(
146
+ start=meeting.start, end=meeting.end,
147
+ meeting_id=meeting.meeting_id, room_id=room.room_id,
148
+ title=meeting.title, attendees=meeting.attendees,
149
+ )
150
+ self._bookings[room.room_id].append(slot)
151
+ self._bookings[room.room_id].sort()
152
+ return slot
153
+ return None
154
+
155
+ def free_slots(self, room_id: str, window_start: datetime, window_end: datetime, min_duration_minutes: int = 30) -> list[tuple]:
156
+ slots = self._bookings.get(room_id, [])
157
+ free, cursor = [], window_start
158
+ for slot in slots:
159
+ if slot.start > cursor:
160
+ if (slot.start - cursor).total_seconds() / 60 >= min_duration_minutes:
161
+ free.append((cursor, slot.start))
162
+ cursor = max(cursor, slot.end)
163
+ if cursor < window_end and (window_end - cursor).total_seconds() / 60 >= min_duration_minutes:
164
+ free.append((cursor, window_end))
165
+ return free
166
+
167
+ def all_bookings(self) -> list[MeetingSlot]:
168
+ result = []
169
+ for slots in self._bookings.values():
170
+ result.extend(slots)
171
+ return sorted(result)
172
+
173
+ def _eligible_rooms(self, meeting: Meeting) -> list[Room]:
174
+ return [r for r in self.rooms if r.capacity >= meeting.attendees and (not meeting.needs_av or r.has_av)]
175
+
176
+ def _is_free(self, room: Room, start: datetime, end: datetime) -> bool:
177
+ return not any(s.overlaps(start, end) for s in self._bookings[room.room_id])
178
+
179
+
180
+ @dataclass
181
+ class BreachedObligation:
182
+ contract_id: str
183
+ obligation_type: ObligationType
184
+ metric_name: str
185
+ threshold_value: float
186
+ current_value: float
187
+ predicted_value: Optional[float]
188
+ deadline: str
189
+ consequence: str
190
+ conflict_with: Optional[str] = None
191
+
192
+ @property
193
+ def breach_gap(self) -> float:
194
+ return self.current_value - self.threshold_value
195
+
196
+ def auto_severity(self) -> Severity:
197
+ critical_kw = {"termination", "default", "liquidation", "penalty"}
198
+ high_kw = {"notice", "cure period", "acceleration"}
199
+ cl = self.consequence.lower()
200
+ if any(k in cl for k in critical_kw) or self.conflict_with:
201
+ return Severity.CRITICAL
202
+ if any(k in cl for k in high_kw):
203
+ return Severity.HIGH
204
+ gap_pct = abs(self.breach_gap) / (abs(self.threshold_value) + 1e-9)
205
+ if gap_pct > 0.30:
206
+ return Severity.HIGH
207
+ if gap_pct > 0.10:
208
+ return Severity.MEDIUM
209
+ return Severity.LOW
210
+
211
+
212
+ @dataclass
213
+ class Task:
214
+ task_id: str
215
+ title: str
216
+ description: str
217
+ assigned_to: list[Department]
218
+ severity: Severity
219
+ due_by: datetime
220
+ contract_id: str
221
+ obligation_type: ObligationType
222
+ created_at: datetime = field(default_factory=datetime.now)
223
+
224
+ def to_dict(self) -> dict:
225
+ return {
226
+ "task_id": self.task_id,
227
+ "title": self.title,
228
+ "description": self.description,
229
+ "assigned_to": [d.value for d in self.assigned_to],
230
+ "severity": self.severity.name,
231
+ "due_by": self.due_by.isoformat(),
232
+ "contract_id": self.contract_id,
233
+ "obligation_type": self.obligation_type.value,
234
+ "created_at": self.created_at.isoformat(),
235
+ }
236
+
237
+
238
+ class TaskScheduler:
239
+ def __init__(self, room_scheduler: Optional[MeetingRoomScheduler] = None) -> None:
240
+ self._tasks: list[Task] = []
241
+ self._room_scheduler = room_scheduler
242
+ self._task_counter = 0
243
+ self._auto_meetings: list[MeetingSlot] = []
244
+
245
+ def reset(self) -> None:
246
+ self._tasks = []
247
+ self._task_counter = 0
248
+ self._auto_meetings = []
249
+ if self._room_scheduler:
250
+ self._room_scheduler.reset()
251
+
252
+ def process_breach(self, breach: BreachedObligation) -> tuple[Task, Optional[MeetingSlot]]:
253
+ severity = breach.auto_severity()
254
+ departments = self._resolve_departments(breach.obligation_type, severity)
255
+ due_by = datetime.now() + timedelta(hours=SEVERITY_DUE_HOURS[severity])
256
+ self._task_counter += 1
257
+ task = Task(
258
+ task_id=f"TASK-{self._task_counter:04d}",
259
+ title=f"[{severity.name}] Breach: {breach.metric_name} in {breach.contract_id}",
260
+ description=self._build_description(breach, severity),
261
+ assigned_to=departments,
262
+ severity=severity,
263
+ due_by=due_by,
264
+ contract_id=breach.contract_id,
265
+ obligation_type=breach.obligation_type,
266
+ )
267
+ self._tasks.append(task)
268
+
269
+ meeting_slot = None
270
+ if breach.conflict_with and self._room_scheduler:
271
+ meeting_slot = self._book_conflict_meeting(task, breach)
272
+
273
+ self._alert_team_via_email(task)
274
+
275
+ return task, meeting_slot
276
+
277
+ def _alert_team_via_email(self, task: Task) -> None:
278
+ if task.severity not in [Severity.CRITICAL, Severity.HIGH]:
279
+ return
280
+ email_body = f"""
281
+ [URGENT] Covenant Breach Alert: {task.title}
282
+
283
+ A {task.severity.name} severity breach has been processed by the ContractPulse Response Engine.
284
+
285
+ Task ID: {task.task_id}
286
+ Assigned To: {[d.value for d in task.assigned_to]}
287
+ Due By: {task.due_by.strftime('%Y-%m-%d %H:%M:%S')}
288
+
289
+ Details:
290
+ {task.description}
291
+
292
+ Please take immediate action.
293
+ """
294
+
295
+ # Try to send a real email if credentials are set
296
+ sender_email = os.getenv("SMTP_EMAIL")
297
+ sender_password = os.getenv("SMTP_PASSWORD")
298
+ receiver_email = os.getenv("ALERT_RECEIVER_EMAIL", sender_email) # Send to self if no specific receiver
299
+
300
+ if sender_email and sender_password and receiver_email:
301
+ try:
302
+ msg = EmailMessage()
303
+ msg.set_content(email_body)
304
+ msg['Subject'] = f"[URGENT] Covenant Breach Alert: {task.title}"
305
+ msg['From'] = sender_email
306
+ msg['To'] = receiver_email
307
+
308
+ # Assuming Gmail SMTP for this example
309
+ with smtplib.SMTP_SSL("smtp.gmail.com", 465) as server:
310
+ server.login(sender_email, sender_password)
311
+ server.send_message(msg)
312
+
313
+ print(f"πŸ“§ REAL EMAIL SUCCESSFULLY SENT TO {receiver_email}")
314
+ return
315
+ except Exception as e:
316
+ print(f"Failed to send real email: {e}. Falling back to terminal log.")
317
+
318
+ # Fallback to terminal if no credentials or sending fails
319
+ print("\n" + "="*60)
320
+ print("πŸ“§ AUTOMATED EMAIL ALERT DISPATCHED (Terminal Mock)")
321
+ print("="*60)
322
+ print(email_body)
323
+ print("="*60 + "\n")
324
+
325
+ def process_batch(self, breaches: list[BreachedObligation]) -> tuple[list[Task], list[MeetingSlot]]:
326
+ results = [self.process_breach(b) for b in breaches]
327
+ tasks = [r[0] for r in results]
328
+ meetings = [r[1] for r in results if r[1]]
329
+ return sorted(tasks, key=lambda t: t.severity.value, reverse=True), meetings
330
+
331
+ def all_tasks(self) -> list[Task]:
332
+ return sorted(self._tasks, key=lambda t: t.severity.value, reverse=True)
333
+
334
+ def department_summary(self) -> dict:
335
+ summary = {}
336
+ for task in self._tasks:
337
+ for dept in task.assigned_to:
338
+ key = dept.value
339
+ if key not in summary:
340
+ summary[key] = {s.name: 0 for s in Severity}
341
+ summary[key][task.severity.name] += 1
342
+ return summary
343
+
344
+ def _resolve_departments(self, ob_type: ObligationType, severity: Severity) -> list[Department]:
345
+ primary = OBLIGATION_OWNERS.get(ob_type, [Department.LEGAL])
346
+ escalated = SEVERITY_ESCALATION.get(severity, [])
347
+ return list(dict.fromkeys(primary + escalated))
348
+
349
+ def _build_description(self, breach: BreachedObligation, severity: Severity) -> str:
350
+ lines = [
351
+ f"Contract : {breach.contract_id}",
352
+ f"Metric : {breach.metric_name}",
353
+ f"Threshold : {breach.threshold_value}",
354
+ f"Current : {breach.current_value} (gap={breach.breach_gap:+.2f})",
355
+ ]
356
+ if breach.predicted_value is not None:
357
+ lines.append(f"Predicted : {breach.predicted_value} (next period)")
358
+ lines += [f"Deadline : {breach.deadline}", f"Consequence: {breach.consequence}"]
359
+ if breach.conflict_with:
360
+ lines.append(f"⚠ Conflicts with: {breach.conflict_with}")
361
+ return "\n ".join(lines)
362
+
363
+ def _book_conflict_meeting(self, task: Task, breach: BreachedObligation) -> Optional[MeetingSlot]:
364
+ now = datetime.now().replace(second=0, microsecond=0)
365
+ window_end = now + timedelta(hours=8)
366
+ for room in self._room_scheduler.rooms:
367
+ free = self._room_scheduler.free_slots(room.room_id, now, window_end, min_duration_minutes=60)
368
+ if free:
369
+ start, _ = free[0]
370
+ meeting = Meeting(
371
+ title=f"Conflict Review: {breach.contract_id} vs {breach.conflict_with}",
372
+ start=start, end=start + timedelta(minutes=60),
373
+ attendees=max(3, len(task.assigned_to) * 2),
374
+ needs_av=True,
375
+ )
376
+ slot = self._room_scheduler.schedule(meeting)
377
+ if slot:
378
+ self._auto_meetings.append(slot)
379
+ return slot
380
+ return None
381
+
382
+
383
+ # ─────────────────────────────────────────────────────────────
384
+ # GLOBAL SCHEDULER INSTANCE
385
+ # ─────────────────────────────────────────────────────────────
386
+
387
+ def _build_scheduler() -> TaskScheduler:
388
+ rooms = [
389
+ Room("R1", "Boardroom", capacity=20, has_av=True),
390
+ Room("R2", "Conference A", capacity=10, has_av=True),
391
+ Room("R3", "Conference B", capacity=8, has_av=False),
392
+ Room("R4", "Huddle Room 1", capacity=4, has_av=False),
393
+ Room("R5", "Huddle Room 2", capacity=4, has_av=True),
394
+ ]
395
+ return TaskScheduler(room_scheduler=MeetingRoomScheduler(rooms))
396
+
397
+
398
+ # Use a mutable container so blueprint routes can rebind it reliably
399
+ _state = {"scheduler": _build_scheduler()}
400
+
401
+
402
+ def get_scheduler() -> TaskScheduler:
403
+ return _state["scheduler"]
404
+
405
+
406
+ # ─────────────────────────────────────────────────────────────
407
+ # HELPERS
408
+ # ─────────────────────────────────────────────────────────────
409
+
410
+ def _parse_obligation_type(raw: str) -> ObligationType:
411
+ for ot in ObligationType:
412
+ if ot.value == raw:
413
+ return ot
414
+ return ObligationType.UNKNOWN
415
+
416
+
417
+ def _breach_from_dict(d: dict) -> BreachedObligation:
418
+ return BreachedObligation(
419
+ contract_id=d["contract_id"],
420
+ obligation_type=_parse_obligation_type(d.get("obligation_type", "unknown")),
421
+ metric_name=d.get("metric_name", d.get("obligation_type", "unknown")),
422
+ threshold_value=float(d["threshold_value"]),
423
+ current_value=float(d["current_value"]),
424
+ predicted_value=float(d["predicted_value"]) if d.get("predicted_value") is not None else None,
425
+ deadline=d.get("deadline", "unknown"),
426
+ consequence=d.get("consequence", "notice"),
427
+ # BUG FIX: treat empty string as None so conflict meetings are not
428
+ # accidentally triggered when the frontend sends conflict_with: ""
429
+ conflict_with=d.get("conflict_with") or None,
430
+ )
431
+
432
+
433
+ # ─────────────────────────────────────────────────────────────
434
+ # ROUTES
435
+ # ─────────────────────────────────────────────────────────────
436
+
437
+ @scheduler_bp.get("/api/health")
438
+ def health():
439
+ return jsonify({"status": "ok", "timestamp": datetime.now().isoformat()})
440
+
441
+
442
+ @scheduler_bp.post("/api/process_breach")
443
+ def process_breach():
444
+ """
445
+ Body (JSON):
446
+ {
447
+ "contract_id": "CTR-001",
448
+ "obligation_type": "revenue",
449
+ "metric_name": "revenue",
450
+ "threshold_value": 5000000,
451
+ "current_value": 3800000,
452
+ "predicted_value": 3500000, // optional
453
+ "deadline": "annually",
454
+ "consequence": "termination",
455
+ "conflict_with": null // optional contract_id
456
+ }
457
+ """
458
+ body = request.get_json(force=True)
459
+ try:
460
+ breach = _breach_from_dict(body)
461
+ except (KeyError, ValueError) as e:
462
+ return jsonify({"error": f"Invalid payload: {e}"}), 400
463
+
464
+ task, meeting_slot = get_scheduler().process_breach(breach)
465
+
466
+ return jsonify({
467
+ "task": task.to_dict(),
468
+ "meeting": meeting_slot.to_dict() if meeting_slot else None,
469
+ }), 201
470
+
471
+
472
+ @scheduler_bp.post("/api/process_batch")
473
+ def process_batch():
474
+ """
475
+ Body: { "breaches": [ <breach>, ... ] }
476
+ Returns tasks sorted by severity desc + any auto-booked meetings.
477
+ """
478
+ body = request.get_json(force=True)
479
+ raw_breaches = body.get("breaches", [])
480
+ if not raw_breaches:
481
+ return jsonify({"error": "breaches list is empty"}), 400
482
+
483
+ try:
484
+ breaches = [_breach_from_dict(b) for b in raw_breaches]
485
+ except (KeyError, ValueError) as e:
486
+ return jsonify({"error": f"Invalid breach in batch: {e}"}), 400
487
+
488
+ tasks, meetings = get_scheduler().process_batch(breaches)
489
+
490
+ return jsonify({
491
+ "tasks": [t.to_dict() for t in tasks],
492
+ "meetings": [m.to_dict() for m in meetings],
493
+ "count": len(tasks),
494
+ }), 201
495
+
496
+
497
+ @scheduler_bp.get("/api/tasks")
498
+ def get_tasks():
499
+ """
500
+ Query params:
501
+ ?severity=CRITICAL (optional filter)
502
+ ?department=Finance (optional filter)
503
+ """
504
+ sev_filter = request.args.get("severity", "").upper()
505
+ dept_filter = request.args.get("department", "")
506
+ tasks = get_scheduler().all_tasks()
507
+
508
+ if sev_filter and sev_filter in Severity.__members__:
509
+ target = Severity[sev_filter]
510
+ tasks = [t for t in tasks if t.severity == target]
511
+
512
+ if dept_filter:
513
+ dept_vals = [d.value for d in Department]
514
+ if dept_filter in dept_vals:
515
+ target_dept = Department(dept_filter)
516
+ tasks = [t for t in tasks if target_dept in t.assigned_to]
517
+
518
+ return jsonify({"tasks": [t.to_dict() for t in tasks], "count": len(tasks)})
519
+
520
+
521
+ # BUG FIX: There were TWO @scheduler_bp.get("/api/meetings") routes in the
522
+ # original file. Flask registers only the FIRST one it sees, which called a
523
+ # non-existent method `_task_scheduler_meetings()` and crashed on every
524
+ # request. The second (correct) route was silently ignored.
525
+ # Fixed: one route that correctly reads _auto_meetings from the scheduler.
526
+ @scheduler_bp.get("/api/meetings")
527
+ def get_meetings():
528
+ """Return all auto-booked conflict-resolution meeting slots."""
529
+ auto = get_scheduler()._auto_meetings or []
530
+ return jsonify({"meetings": [m.to_dict() for m in auto], "count": len(auto)})
531
+
532
+
533
+ @scheduler_bp.get("/api/departments")
534
+ def get_departments():
535
+ return jsonify({"summary": get_scheduler().department_summary()})
536
+
537
+
538
+ @scheduler_bp.post("/api/reset")
539
+ def reset():
540
+ # BUG FIX: rebinding a module-level `scheduler` variable inside a
541
+ # blueprint function is unreliable β€” the local name rebinds but callers
542
+ # that already imported the old object keep the stale reference.
543
+ # Fixed: mutate the shared _state dict so all routes see the new instance.
544
+ _state["scheduler"] = _build_scheduler()
545
+ return jsonify({"status": "reset", "timestamp": datetime.now().isoformat()})
546
+
547
+ # Add this at the very bottom of scheduler_api.py
548
+ scheduler = _state["scheduler"]
test_pipeline.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import sys
4
+
5
+ # Add project root to path so 'src' package is found
6
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
7
+
8
+ from all_model_code.model_1_code.pipeline import ObligationPipeline
9
+
10
+ def run_test():
11
+ # ─── Sample contract (you can replace this later) ───
12
+ sample_contract = """
13
+ The Borrower shall maintain at all times a Debt-to-Equity Ratio of not more than 2.5 to 1.0, tested quarterly.
14
+ Failure to maintain this ratio shall constitute an event of default.
15
+
16
+ The Company shall maintain minimum annual gross revenue of at least $5,000,000 during each contract year.
17
+
18
+ The Vendor shall obtain and maintain commercial general liability insurance with a minimum coverage amount of $2,000,000.
19
+ """
20
+
21
+ # ─── Initialize pipeline ───
22
+ # Ensure the model path points to the actual folder where your weights live
23
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
24
+ model_path = os.path.join(BASE_DIR, "ckpt_obligation_fast")
25
+
26
+ config = {
27
+ "model_name": model_path,
28
+ "device": "cpu",
29
+ "filter_min_confidence": 0.4,
30
+ "min_fields": 2
31
+ }
32
+
33
+ pipeline = ObligationPipeline(config)
34
+
35
+ print("\n" + "=" * 60)
36
+ print("RUNNING OBLIGATION EXTRACTION TEST")
37
+ print("=" * 60)
38
+
39
+ # ─── Run pipeline ───
40
+ results = pipeline.process(
41
+ source=sample_contract,
42
+ source_type="text",
43
+ contract_id="demo_contract",
44
+ debug=False
45
+ )
46
+
47
+ # ─── Clean display ───
48
+ cleaned_results = []
49
+
50
+ for r in results:
51
+ cleaned = {
52
+ "metric": r.get("metric_name"),
53
+ "operator": r.get("operator"),
54
+ "value": r.get("threshold_value"),
55
+ "deadline": r.get("deadline"),
56
+ "consequence": r.get("consequence"),
57
+ "confidence": round(r.get("confidence_score", 0), 3)
58
+ }
59
+ cleaned = {k: v for k, v in cleaned.items() if v is not None}
60
+ cleaned_results.append(cleaned)
61
+
62
+ # ─── Deduplicate: same metric + same value β†’ keep highest confidence ───
63
+ seen = {}
64
+ for obligation in cleaned_results:
65
+ key = (obligation.get("metric"), obligation.get("value"))
66
+ if key not in seen:
67
+ seen[key] = obligation
68
+ else:
69
+ if obligation.get("confidence", 0) > seen[key].get("confidence", 0):
70
+ seen[key] = obligation
71
+
72
+ deduplicated_results = list(seen.values())
73
+
74
+ print("\nExtracted Obligations:\n")
75
+ print(json.dumps(deduplicated_results, indent=2))
76
+
77
+ print("\nTotal Obligations Found:", len(deduplicated_results))
78
+
79
+
80
+ if __name__ == "__main__":
81
+ run_test()
test_routes.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import email.message
3
+
4
+ class _CGI:
5
+ @staticmethod
6
+ def parse_header(line):
7
+ m = email.message.Message()
8
+ m['content-type'] = line
9
+ return m.get_content_type(), m.get_params() or {}
10
+
11
+ sys.modules['cgi'] = _CGI()
12
+
13
+ from main import app
14
+ print([r.rule for r in app.url_map.iter_rules()])