Spaces:
Running
Running
Upload 7 files
Browse files- Dockerfile +22 -0
- clause_extractor.py +384 -0
- main.py +658 -0
- model3.py +200 -0
- scheduler_api.py +548 -0
- test_pipeline.py +81 -0
- test_routes.py +14 -0
Dockerfile
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
# Install system dependencies (needed for pandas, prophet, etc.)
|
| 6 |
+
RUN apt-get update && apt-get install -y \
|
| 7 |
+
build-essential \
|
| 8 |
+
gcc \
|
| 9 |
+
g++ \
|
| 10 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 11 |
+
|
| 12 |
+
# Copy backend code
|
| 13 |
+
COPY . /app
|
| 14 |
+
|
| 15 |
+
# Install Python dependencies
|
| 16 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 17 |
+
|
| 18 |
+
# Expose HF required port
|
| 19 |
+
EXPOSE 7860
|
| 20 |
+
|
| 21 |
+
# Start app
|
| 22 |
+
CMD ["python", "main.py"]
|
clause_extractor.py
ADDED
|
@@ -0,0 +1,384 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Cross-Contract Clause Extractor and Pair Generator
|
| 3 |
+
Uses Groq API (groq.com) for clause extraction
|
| 4 |
+
Feeds pairs into Model 3 (NLI conflict detection)
|
| 5 |
+
|
| 6 |
+
Install: pip install groq
|
| 7 |
+
API key: https://console.groq.com
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import os
|
| 11 |
+
import json
|
| 12 |
+
import torch
|
| 13 |
+
from groq import Groq
|
| 14 |
+
from transformers import pipeline as hf_pipeline, AutoTokenizer
|
| 15 |
+
from dotenv import load_dotenv
|
| 16 |
+
load_dotenv()
|
| 17 |
+
|
| 18 |
+
# ββ Config ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 19 |
+
|
| 20 |
+
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
| 21 |
+
print("API KEY:", GROQ_API_KEY)
|
| 22 |
+
MODEL3_DIR = "../model_3" # path to your saved Model 3
|
| 23 |
+
GROQ_MODEL = "openai/gpt-oss-120b" # same model you're already using
|
| 24 |
+
MAX_LEN = 512 # must match Model 3 training config
|
| 25 |
+
CONF_THRESHOLD = 0.7 # flag pairs below this as uncertain
|
| 26 |
+
|
| 27 |
+
CLAUSE_TYPES = [
|
| 28 |
+
"termination",
|
| 29 |
+
"warranty",
|
| 30 |
+
"indemnification",
|
| 31 |
+
"ip_ownership",
|
| 32 |
+
"dispute_resolution",
|
| 33 |
+
"confidentiality",
|
| 34 |
+
"liability_cap",
|
| 35 |
+
"governing_law",
|
| 36 |
+
"payment",
|
| 37 |
+
"non_compete",
|
| 38 |
+
"force_majeure",
|
| 39 |
+
"assignment",
|
| 40 |
+
]
|
| 41 |
+
|
| 42 |
+
# ββ Groq client βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 43 |
+
|
| 44 |
+
groq_client = Groq(api_key=GROQ_API_KEY)
|
| 45 |
+
|
| 46 |
+
# ββ Step 1: Extract clauses from a single contract ββββββββββββββββββββββββββββ
|
| 47 |
+
|
| 48 |
+
EXTRACTION_SYSTEM_PROMPT = """You are a legal clause extraction engine.
|
| 49 |
+
Your job is to extract distinct legal clauses from contract text.
|
| 50 |
+
You must return ONLY a valid JSON array β no explanation, no markdown fences, no preamble.
|
| 51 |
+
Never extract financial covenant clauses with numeric thresholds β those are handled separately."""
|
| 52 |
+
|
| 53 |
+
EXTRACTION_USER_PROMPT = """Extract all distinct legal clauses from this contract.
|
| 54 |
+
|
| 55 |
+
For each clause return:
|
| 56 |
+
- clause_type: one of [{clause_types}]
|
| 57 |
+
- clause_text: the core legal obligation rewritten concisely in 1-2 sentences. Max 60 words. Do NOT copy verbatim.
|
| 58 |
+
|
| 59 |
+
Rules:
|
| 60 |
+
- One entry per clause_type maximum. If duplicates exist, keep the most restrictive.
|
| 61 |
+
- Skip purely numeric clauses like "maintain debt ratio >= 2.5" β financial covenants only.
|
| 62 |
+
- Skip any clause that does not fit the listed types.
|
| 63 |
+
|
| 64 |
+
Return format β JSON array only, nothing else:
|
| 65 |
+
[
|
| 66 |
+
{{"clause_type": "termination", "clause_text": "Either party may terminate with 30 days written notice."}},
|
| 67 |
+
{{"clause_type": "dispute_resolution", "clause_text": "All disputes resolved through binding arbitration in New York."}}
|
| 68 |
+
]
|
| 69 |
+
|
| 70 |
+
Contract text:
|
| 71 |
+
{contract_text}"""
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
def extract_clauses(contract_text: str, contract_label: str) -> list[dict]:
|
| 75 |
+
"""
|
| 76 |
+
Call Groq to extract and classify clauses from one contract.
|
| 77 |
+
Returns list of {clause_type, clause_text, contract} dicts.
|
| 78 |
+
"""
|
| 79 |
+
prompt = EXTRACTION_USER_PROMPT.format(
|
| 80 |
+
clause_types=", ".join(CLAUSE_TYPES),
|
| 81 |
+
contract_text=contract_text.strip()
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
# Non-streaming β we need the full response before JSON parsing
|
| 85 |
+
completion = groq_client.chat.completions.create(
|
| 86 |
+
model=GROQ_MODEL,
|
| 87 |
+
messages=[
|
| 88 |
+
{"role": "system", "content": EXTRACTION_SYSTEM_PROMPT},
|
| 89 |
+
{"role": "user", "content": prompt},
|
| 90 |
+
],
|
| 91 |
+
temperature=0, # deterministic extraction
|
| 92 |
+
max_completion_tokens=2048,
|
| 93 |
+
top_p=1,
|
| 94 |
+
reasoning_effort="medium",
|
| 95 |
+
stream=False, # must be False β need full JSON before parsing
|
| 96 |
+
stop=None,
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
raw = completion.choices[0].message.content.strip()
|
| 100 |
+
|
| 101 |
+
# Strip markdown fences if model adds them anyway
|
| 102 |
+
if raw.startswith("```"):
|
| 103 |
+
raw = raw.split("```")[1]
|
| 104 |
+
if raw.startswith("json"):
|
| 105 |
+
raw = raw[4:]
|
| 106 |
+
raw = raw.strip()
|
| 107 |
+
|
| 108 |
+
try:
|
| 109 |
+
clauses = json.loads(raw)
|
| 110 |
+
# Handle if model wraps array in a dict
|
| 111 |
+
if isinstance(clauses, dict):
|
| 112 |
+
clauses = next(iter(clauses.values()))
|
| 113 |
+
except json.JSONDecodeError as e:
|
| 114 |
+
print(f"[ERROR] JSON parse failed for {contract_label}: {e}")
|
| 115 |
+
print(f"Raw response was:\n{raw[:400]}")
|
| 116 |
+
return []
|
| 117 |
+
|
| 118 |
+
for c in clauses:
|
| 119 |
+
c["contract"] = contract_label
|
| 120 |
+
|
| 121 |
+
print(f"\n[{contract_label}] Extracted {len(clauses)} clauses:")
|
| 122 |
+
for c in clauses:
|
| 123 |
+
print(f" [{c['clause_type']}] {c['clause_text'][:80]}...")
|
| 124 |
+
|
| 125 |
+
return clauses
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
# ββ Step 2: Pair same-type clauses across contracts βββββββββββββββββββββββββββ
|
| 129 |
+
|
| 130 |
+
def generate_pairs(
|
| 131 |
+
clauses_a: list[dict],
|
| 132 |
+
clauses_b: list[dict],
|
| 133 |
+
) -> list[dict]:
|
| 134 |
+
"""
|
| 135 |
+
Match clauses of the same type across Contract A and Contract B.
|
| 136 |
+
Returns list of {clause_type, clause_a, clause_b} dicts.
|
| 137 |
+
"""
|
| 138 |
+
index_a = {c["clause_type"]: c["clause_text"] for c in clauses_a}
|
| 139 |
+
index_b = {c["clause_type"]: c["clause_text"] for c in clauses_b}
|
| 140 |
+
|
| 141 |
+
matched_types = set(index_a.keys()) & set(index_b.keys())
|
| 142 |
+
unmatched_types = set(index_a.keys()).symmetric_difference(set(index_b.keys()))
|
| 143 |
+
|
| 144 |
+
pairs = [
|
| 145 |
+
{
|
| 146 |
+
"clause_type": clause_type,
|
| 147 |
+
"clause_a": index_a[clause_type],
|
| 148 |
+
"clause_b": index_b[clause_type],
|
| 149 |
+
}
|
| 150 |
+
for clause_type in matched_types
|
| 151 |
+
]
|
| 152 |
+
|
| 153 |
+
print(f"\n[PAIRING] {len(pairs)} matching types: {sorted(matched_types)}")
|
| 154 |
+
if unmatched_types:
|
| 155 |
+
print(f"[PAIRING] Only in one contract (skipped): {sorted(unmatched_types)}")
|
| 156 |
+
|
| 157 |
+
return pairs
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
# ββ Step 3: Validate token lengths before inference βββββββββββββββββββββββββββ
|
| 161 |
+
|
| 162 |
+
def check_token_length(tokenizer, clause_a: str, clause_b: str, max_len: int) -> int:
|
| 163 |
+
"""Returns token count. Warns if truncation will occur."""
|
| 164 |
+
tokens = tokenizer(
|
| 165 |
+
f"{clause_a} [SEP] {clause_b}",
|
| 166 |
+
return_tensors="pt",
|
| 167 |
+
truncation=False,
|
| 168 |
+
)
|
| 169 |
+
length = tokens["input_ids"].shape[1]
|
| 170 |
+
if length > max_len:
|
| 171 |
+
print(f" [WARN] {length} tokens > MAX_LEN {max_len} β will be truncated")
|
| 172 |
+
elif length > int(max_len * 0.85):
|
| 173 |
+
print(f" [WARN] {length} tokens is close to limit ({max_len})")
|
| 174 |
+
return length
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
# ββ Step 4: Load and run Model 3 βββββββββββββββββββββββββββββββββββββββββββββ
|
| 178 |
+
|
| 179 |
+
def load_model3(model_dir: str, max_len: int):
|
| 180 |
+
tokenizer = AutoTokenizer.from_pretrained(model_dir)
|
| 181 |
+
device = 0 if torch.cuda.is_available() else -1
|
| 182 |
+
pipe = hf_pipeline(
|
| 183 |
+
"text-classification",
|
| 184 |
+
model=model_dir,
|
| 185 |
+
tokenizer=tokenizer,
|
| 186 |
+
device=device,
|
| 187 |
+
top_k=None,
|
| 188 |
+
truncation=True,
|
| 189 |
+
max_length=max_len,
|
| 190 |
+
return_token_type_ids=False # π₯ ADD THIS
|
| 191 |
+
)
|
| 192 |
+
print(f"\n[MODEL3] Loaded from '{model_dir}' on {'GPU' if device == 0 else 'CPU'}")
|
| 193 |
+
return pipe, tokenizer
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
def score_pairs(
|
| 197 |
+
pairs: list[dict],
|
| 198 |
+
pipe,
|
| 199 |
+
tokenizer,
|
| 200 |
+
max_len: int,
|
| 201 |
+
conf_threshold: float,
|
| 202 |
+
) -> list[dict]:
|
| 203 |
+
"""
|
| 204 |
+
Run Model 3 on each clause pair.
|
| 205 |
+
Returns results sorted: contradictions first, then by confidence descending.
|
| 206 |
+
"""
|
| 207 |
+
results = []
|
| 208 |
+
|
| 209 |
+
for pair in pairs:
|
| 210 |
+
clause_a = pair["clause_a"]
|
| 211 |
+
clause_b = pair["clause_b"]
|
| 212 |
+
clause_type = pair["clause_type"]
|
| 213 |
+
|
| 214 |
+
token_len = check_token_length(tokenizer, clause_a, clause_b, max_len)
|
| 215 |
+
raw_result = pipe(f"{clause_a} [SEP] {clause_b}")
|
| 216 |
+
|
| 217 |
+
if raw_result and isinstance(raw_result[0], list):
|
| 218 |
+
raw_result = raw_result[0]
|
| 219 |
+
|
| 220 |
+
scores = {r["label"]: r["score"] for r in raw_result}
|
| 221 |
+
predicted_label = max(scores, key=scores.get)
|
| 222 |
+
predicted_score = scores[predicted_label]
|
| 223 |
+
contradiction_score = scores.get("contradiction", 0.0)
|
| 224 |
+
|
| 225 |
+
results.append({
|
| 226 |
+
"clause_type": clause_type,
|
| 227 |
+
"clause_a": clause_a,
|
| 228 |
+
"clause_b": clause_b,
|
| 229 |
+
"predicted_label": predicted_label,
|
| 230 |
+
"predicted_score": round(predicted_score, 4),
|
| 231 |
+
"contradiction_score": round(contradiction_score, 4),
|
| 232 |
+
"all_scores": {k: round(v, 4) for k, v in scores.items()},
|
| 233 |
+
"token_length": token_len,
|
| 234 |
+
"uncertain": predicted_score < conf_threshold,
|
| 235 |
+
})
|
| 236 |
+
|
| 237 |
+
# Contradictions first, then sorted by confidence descending
|
| 238 |
+
results.sort(key=lambda x: (
|
| 239 |
+
x["predicted_label"] != "contradiction",
|
| 240 |
+
-x["predicted_score"],
|
| 241 |
+
))
|
| 242 |
+
# β
Keep only strong, reliable contradictions
|
| 243 |
+
|
| 244 |
+
return results
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
# ββ Step 5: Print results βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 248 |
+
|
| 249 |
+
def print_results(results: list[dict]):
|
| 250 |
+
# π₯ Split results
|
| 251 |
+
strong = [
|
| 252 |
+
r for r in results
|
| 253 |
+
if r["predicted_label"] == "contradiction"
|
| 254 |
+
and not r["uncertain"]
|
| 255 |
+
and r["predicted_score"] >= 0.75
|
| 256 |
+
]
|
| 257 |
+
|
| 258 |
+
uncertain = [
|
| 259 |
+
r for r in results
|
| 260 |
+
if r["uncertain"]
|
| 261 |
+
]
|
| 262 |
+
|
| 263 |
+
print("\n" + "=" * 65)
|
| 264 |
+
print("CONTRACT CONFLICT ANALYSIS")
|
| 265 |
+
print("=" * 65)
|
| 266 |
+
|
| 267 |
+
# β
PRIMARY SECTION
|
| 268 |
+
print("\nββ HIGH-CONFIDENCE CONFLICTS βββββββββββββββββββββββββββββ")
|
| 269 |
+
print("[INFO] These are reliable contradictions (>= 75%)")
|
| 270 |
+
|
| 271 |
+
if strong:
|
| 272 |
+
for r in strong:
|
| 273 |
+
print(f"\n[{r['clause_type'].upper()}] {r['predicted_score']:.2%}")
|
| 274 |
+
print(f"Contract A: {r['clause_a']}")
|
| 275 |
+
print(f"Contract B: {r['clause_b']}")
|
| 276 |
+
else:
|
| 277 |
+
print(" None found.")
|
| 278 |
+
|
| 279 |
+
# β οΈ SECONDARY SECTION
|
| 280 |
+
print("\nββ UNCERTAIN / REVIEW NEEDED βββββββββββββββββββββββββββββ")
|
| 281 |
+
print("[INFO] Lower-confidence predictions β require human validation")
|
| 282 |
+
|
| 283 |
+
if uncertain:
|
| 284 |
+
for r in uncertain:
|
| 285 |
+
print(f"\n[{r['clause_type'].upper()}] "
|
| 286 |
+
f"{r['predicted_label']} ({r['predicted_score']:.2%})")
|
| 287 |
+
print(f"Contract A: {r['clause_a']}")
|
| 288 |
+
print(f"Contract B: {r['clause_b']}")
|
| 289 |
+
else:
|
| 290 |
+
print(" None.")
|
| 291 |
+
|
| 292 |
+
print("\n" + "=" * 65)
|
| 293 |
+
|
| 294 |
+
|
| 295 |
+
# ββ Main pipeline βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 296 |
+
|
| 297 |
+
def run_pipeline(
|
| 298 |
+
contract_a_text: str,
|
| 299 |
+
contract_b_text: str,
|
| 300 |
+
model3_dir: str = MODEL3_DIR,
|
| 301 |
+
max_len: int = MAX_LEN,
|
| 302 |
+
conf_threshold: float = CONF_THRESHOLD,
|
| 303 |
+
) -> list[dict]:
|
| 304 |
+
"""
|
| 305 |
+
Full pipeline:
|
| 306 |
+
1. Groq extracts + classifies clauses from both contracts
|
| 307 |
+
2. Same-type clauses are paired across contracts
|
| 308 |
+
3. Model 3 scores each pair for contradiction
|
| 309 |
+
4. Results returned sorted by conflict severity
|
| 310 |
+
"""
|
| 311 |
+
print("\nββ STEP 1: Extracting clauses via Groq ββββββββββββββββββββ")
|
| 312 |
+
clauses_a = extract_clauses(contract_a_text, "Contract A")
|
| 313 |
+
clauses_b = extract_clauses(contract_b_text, "Contract B")
|
| 314 |
+
|
| 315 |
+
if not clauses_a or not clauses_b:
|
| 316 |
+
print("[ERROR] Extraction returned empty. Check GROQ_API_KEY and contract text.")
|
| 317 |
+
return []
|
| 318 |
+
|
| 319 |
+
print("\nββ STEP 2: Generating clause pairs ββββββββββββββββββββββββ")
|
| 320 |
+
pairs = generate_pairs(clauses_a, clauses_b)
|
| 321 |
+
|
| 322 |
+
if not pairs:
|
| 323 |
+
print("[WARN] No matching clause types between contracts.")
|
| 324 |
+
return []
|
| 325 |
+
|
| 326 |
+
print(f"\nββ STEP 3: Scoring {len(pairs)} pairs with Model 3 ββββββββ")
|
| 327 |
+
pipe, tokenizer = load_model3(model3_dir, max_len)
|
| 328 |
+
results = score_pairs(pairs, pipe, tokenizer, max_len, conf_threshold)
|
| 329 |
+
|
| 330 |
+
print_results(results)
|
| 331 |
+
return results
|
| 332 |
+
|
| 333 |
+
|
| 334 |
+
# ββ Example usage βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 335 |
+
|
| 336 |
+
if __name__ == "__main__":
|
| 337 |
+
|
| 338 |
+
CONTRACT_A = """
|
| 339 |
+
VENDOR AGREEMENT 2024
|
| 340 |
+
|
| 341 |
+
Termination: Either party may terminate this agreement for convenience
|
| 342 |
+
upon 30 days written notice to the other party.
|
| 343 |
+
|
| 344 |
+
Warranties: Seller warrants that all deliverables shall be free from
|
| 345 |
+
defects for a period of 24 months from the date of acceptance by Buyer.
|
| 346 |
+
|
| 347 |
+
Dispute Resolution: All disputes arising under this agreement shall be
|
| 348 |
+
resolved through binding arbitration in New York under AAA rules.
|
| 349 |
+
|
| 350 |
+
Intellectual Property: The Licensee is granted an exclusive, worldwide,
|
| 351 |
+
perpetual license to use the Software and all derivative works.
|
| 352 |
+
|
| 353 |
+
Confidentiality: Neither party shall disclose Confidential Information
|
| 354 |
+
to any third party without prior written consent of the disclosing party.
|
| 355 |
+
|
| 356 |
+
Governing Law: This agreement shall be governed by the laws of Delaware.
|
| 357 |
+
"""
|
| 358 |
+
|
| 359 |
+
CONTRACT_B = """
|
| 360 |
+
MASTER SERVICES AGREEMENT 2024
|
| 361 |
+
|
| 362 |
+
Termination: This agreement may only be terminated for cause, specifically
|
| 363 |
+
material breach that remains uncured for 60 days after written notice.
|
| 364 |
+
|
| 365 |
+
Warranties: Seller disclaims all warranties, express or implied, including
|
| 366 |
+
any warranty of merchantability or fitness for a particular purpose.
|
| 367 |
+
|
| 368 |
+
Dispute Resolution: Either party may bring suit in any court of competent
|
| 369 |
+
jurisdiction to resolve disputes arising under this agreement.
|
| 370 |
+
|
| 371 |
+
Intellectual Property: The license granted herein is non-exclusive, limited
|
| 372 |
+
to the United States, and valid for 12 months only from the effective date.
|
| 373 |
+
|
| 374 |
+
Confidentiality: Confidential Information must not be shared with outside
|
| 375 |
+
parties unless the disclosing party agrees in writing beforehand.
|
| 376 |
+
|
| 377 |
+
Governing Law: This agreement is governed by the laws of California.
|
| 378 |
+
"""
|
| 379 |
+
|
| 380 |
+
results = run_pipeline(CONTRACT_A, CONTRACT_B)
|
| 381 |
+
|
| 382 |
+
with open("conflict_results.json", "w") as f:
|
| 383 |
+
json.dump(results, f, indent=2)
|
| 384 |
+
print("\nResults saved to conflict_results.json")
|
main.py
ADDED
|
@@ -0,0 +1,658 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
import io
|
| 6 |
+
import pickle
|
| 7 |
+
import secrets
|
| 8 |
+
import threading
|
| 9 |
+
from datetime import datetime, timezone
|
| 10 |
+
from functools import wraps
|
| 11 |
+
|
| 12 |
+
import httpx
|
| 13 |
+
import urllib.parse
|
| 14 |
+
import numpy as np
|
| 15 |
+
import pandas as pd
|
| 16 |
+
import pdfplumber
|
| 17 |
+
from bson import ObjectId
|
| 18 |
+
from dotenv import load_dotenv
|
| 19 |
+
from flask import Flask, jsonify, redirect, request, session
|
| 20 |
+
from flask_cors import CORS
|
| 21 |
+
from flask_session import Session
|
| 22 |
+
from pymongo import MongoClient
|
| 23 |
+
import certifi
|
| 24 |
+
from werkzeug.security import check_password_hash, generate_password_hash
|
| 25 |
+
from all_model_code.model_1_code.pipeline import ObligationPipeline
|
| 26 |
+
from scheduler_api import scheduler_bp, scheduler, BreachedObligation, ObligationType
|
| 27 |
+
|
| 28 |
+
# ββ clause_extractor (for two-contract comparison) ββββββββββββββββββββββββββββ
|
| 29 |
+
# Adjust EXTRACTOR_DIR if clause_extractor.py lives elsewhere relative to main.py
|
| 30 |
+
EXTRACTOR_DIR = os.path.join(os.path.dirname(__file__), "..")
|
| 31 |
+
sys.path.insert(0, EXTRACTOR_DIR)
|
| 32 |
+
try:
|
| 33 |
+
from clause_extractor import extract_clauses, generate_pairs, load_model3, score_pairs
|
| 34 |
+
EXTRACTOR_AVAILABLE = True
|
| 35 |
+
except ImportError as e:
|
| 36 |
+
print(f"[WARN] clause_extractor not found: {e}. /api/compare will run in mock mode.")
|
| 37 |
+
EXTRACTOR_AVAILABLE = False
|
| 38 |
+
|
| 39 |
+
# βββ Load env βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 40 |
+
load_dotenv()
|
| 41 |
+
|
| 42 |
+
MONGO_URI = os.getenv("MONGO_URI")
|
| 43 |
+
GOOGLE_CLIENT_ID = os.getenv("GOOGLE_CLIENT_ID")
|
| 44 |
+
GOOGLE_SECRET = os.getenv("GOOGLE_CLIENT_SECRET")
|
| 45 |
+
GOOGLE_REDIRECT = os.getenv("GOOGLE_REDIRECT_URI")
|
| 46 |
+
FRONTEND_URL = os.getenv("FRONTEND_URL", "http://localhost:3000")
|
| 47 |
+
SECRET_KEY = os.getenv("SECRET_KEY", secrets.token_hex(32))
|
| 48 |
+
IS_PROD = os.getenv("FLASK_ENV", "development") == "production"
|
| 49 |
+
CONF_THRESHOLD = float(os.getenv("CONF_THRESHOLD", "0.7"))
|
| 50 |
+
MAX_LEN = int(os.getenv("MAX_LEN", "512"))
|
| 51 |
+
|
| 52 |
+
# βββ MongoDB ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 53 |
+
try:
|
| 54 |
+
mongo_client = MongoClient(MONGO_URI, tlsCAFile=certifi.where())
|
| 55 |
+
_db = mongo_client["userinfo"]
|
| 56 |
+
_db.users.create_index("email", unique=True, sparse=True)
|
| 57 |
+
print("Connected to MongoDB")
|
| 58 |
+
except Exception as e:
|
| 59 |
+
print(f"MongoDB connection failed: {e}")
|
| 60 |
+
if "SSL" in str(e) or "tls" in str(e).lower():
|
| 61 |
+
print("\n" + "!" * 60)
|
| 62 |
+
print("CRITICAL: ATLAS IP WHITELIST BLOCKED!")
|
| 63 |
+
print("MongoDB Atlas enforces IP whitelisting by aggressively dropping")
|
| 64 |
+
print("the TLS/SSL handshake. This 'tls1 alert internal error' means")
|
| 65 |
+
print("your current network IP is not added to your Atlas allowlist.")
|
| 66 |
+
print("Log into MongoDB Atlas -> Security -> Network Access -> Add IP")
|
| 67 |
+
print("!" * 60 + "\n")
|
| 68 |
+
raise
|
| 69 |
+
|
| 70 |
+
def db():
|
| 71 |
+
return _db
|
| 72 |
+
|
| 73 |
+
def now():
|
| 74 |
+
return datetime.now(timezone.utc)
|
| 75 |
+
|
| 76 |
+
# βββ App setup ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 77 |
+
app = Flask(__name__)
|
| 78 |
+
# app.secret_key = secrets.token_hex(16)
|
| 79 |
+
|
| 80 |
+
CORS(
|
| 81 |
+
app,
|
| 82 |
+
origins = [FRONTEND_URL, "http://localhost:3000", "http://127.0.0.1:3000"],
|
| 83 |
+
supports_credentials = True,
|
| 84 |
+
allow_headers = ["Content-Type", "Authorization"],
|
| 85 |
+
methods = ["GET", "POST", "PUT", "DELETE", "OPTIONS"],
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
app.register_blueprint(scheduler_bp)
|
| 89 |
+
app.config.update(
|
| 90 |
+
SECRET_KEY = os.getenv("SECRET_KEY") or secrets.token_hex(32),
|
| 91 |
+
SESSION_TYPE = "filesystem",
|
| 92 |
+
SESSION_FILE_DIR = os.path.join(os.getcwd(), "session_data"),
|
| 93 |
+
SESSION_COOKIE_SAMESITE = "Lax",
|
| 94 |
+
SESSION_COOKIE_SECURE = IS_PROD,
|
| 95 |
+
)
|
| 96 |
+
Session(app)
|
| 97 |
+
|
| 98 |
+
http = httpx.Client()
|
| 99 |
+
|
| 100 |
+
# βββ Auth helpers βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 101 |
+
def require_auth(fn):
|
| 102 |
+
@wraps(fn)
|
| 103 |
+
def inner(*a, **kw):
|
| 104 |
+
if "user_id" not in session:
|
| 105 |
+
return jsonify({"error": "Not authenticated"}), 401
|
| 106 |
+
return fn(*a, **kw)
|
| 107 |
+
return inner
|
| 108 |
+
|
| 109 |
+
def uid():
|
| 110 |
+
return session.get("user_id")
|
| 111 |
+
|
| 112 |
+
def serialize_user(user):
|
| 113 |
+
"""Return a safe dict β never exposes the hashed password."""
|
| 114 |
+
return {
|
| 115 |
+
"id": str(user["_id"]),
|
| 116 |
+
"name": user.get("name"),
|
| 117 |
+
"email": user.get("email"),
|
| 118 |
+
"picture": user.get("picture"),
|
| 119 |
+
"provider": user.get("provider", "email"),
|
| 120 |
+
"created_at": user["created_at"].isoformat() if user.get("created_at") else None,
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
# βββ Email / Password auth ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 124 |
+
|
| 125 |
+
@app.route("/auth/register", methods=["POST"])
|
| 126 |
+
def register():
|
| 127 |
+
data = request.get_json(silent=True) or {}
|
| 128 |
+
name = (data.get("name") or "").strip()
|
| 129 |
+
email = (data.get("email") or "").strip().lower()
|
| 130 |
+
password = data.get("password") or ""
|
| 131 |
+
|
| 132 |
+
if not name:
|
| 133 |
+
return jsonify({"error": "Name is required"}), 400
|
| 134 |
+
if not email or "@" not in email:
|
| 135 |
+
return jsonify({"error": "A valid email is required"}), 400
|
| 136 |
+
if len(password) < 8:
|
| 137 |
+
return jsonify({"error": "Password must be at least 8 characters"}), 400
|
| 138 |
+
if db().users.find_one({"email": email, "provider": "email"}):
|
| 139 |
+
return jsonify({"error": "An account with that email already exists"}), 409
|
| 140 |
+
|
| 141 |
+
hashed = generate_password_hash(password)
|
| 142 |
+
user_doc = {
|
| 143 |
+
"name": name, "email": email, "password": hashed,
|
| 144 |
+
"picture": None, "provider": "email", "provider_id": None,
|
| 145 |
+
"created_at": now(), "updated_at": now(),
|
| 146 |
+
}
|
| 147 |
+
result = db().users.insert_one(user_doc)
|
| 148 |
+
session["user_id"] = str(result.inserted_id)
|
| 149 |
+
session["user_name"] = name
|
| 150 |
+
user_doc["_id"] = result.inserted_id
|
| 151 |
+
return jsonify({"message": "Account created", "user": serialize_user(user_doc)}), 201
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
@app.route("/auth/login", methods=["POST"])
|
| 155 |
+
def login():
|
| 156 |
+
data = request.get_json(silent=True) or {}
|
| 157 |
+
email = (data.get("email") or "").strip().lower()
|
| 158 |
+
password = data.get("password") or ""
|
| 159 |
+
|
| 160 |
+
if not email or not password:
|
| 161 |
+
return jsonify({"error": "Email and password are required"}), 400
|
| 162 |
+
|
| 163 |
+
user = db().users.find_one({"email": email, "provider": "email"})
|
| 164 |
+
dummy_hash = generate_password_hash("__dummy__")
|
| 165 |
+
stored_hash = user["password"] if user else dummy_hash
|
| 166 |
+
valid = check_password_hash(stored_hash, password)
|
| 167 |
+
|
| 168 |
+
if not user or not valid:
|
| 169 |
+
return jsonify({"error": "Invalid email or password"}), 401
|
| 170 |
+
|
| 171 |
+
session["user_id"] = str(user["_id"])
|
| 172 |
+
session["user_name"] = user.get("name")
|
| 173 |
+
return jsonify({"message": "Logged in", "user": serialize_user(user)})
|
| 174 |
+
|
| 175 |
+
# βββ Google OAuth βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 176 |
+
|
| 177 |
+
@app.route("/auth/login/google")
|
| 178 |
+
def google_login():
|
| 179 |
+
state = secrets.token_urlsafe(16)
|
| 180 |
+
session["oauth_state"] = state
|
| 181 |
+
params = {
|
| 182 |
+
"client_id": GOOGLE_CLIENT_ID, "redirect_uri": GOOGLE_REDIRECT,
|
| 183 |
+
"response_type": "code", "scope": "openid email profile",
|
| 184 |
+
"state": state, "access_type": "online", "prompt": "select_account",
|
| 185 |
+
}
|
| 186 |
+
return redirect(f"https://accounts.google.com/o/oauth2/v2/auth?{urllib.parse.urlencode(params)}")
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
@app.route("/auth/callback/google")
|
| 190 |
+
def google_callback():
|
| 191 |
+
if request.args.get("state") != session.pop("oauth_state", None):
|
| 192 |
+
return jsonify({"error": "Invalid state β possible CSRF"}), 400
|
| 193 |
+
|
| 194 |
+
code = request.args.get("code")
|
| 195 |
+
if not code:
|
| 196 |
+
return jsonify({"error": "No authorization code returned from Google"}), 400
|
| 197 |
+
|
| 198 |
+
token_res = http.post(
|
| 199 |
+
"https://oauth2.googleapis.com/token",
|
| 200 |
+
data={
|
| 201 |
+
"code": code, "client_id": GOOGLE_CLIENT_ID,
|
| 202 |
+
"client_secret": GOOGLE_SECRET, "redirect_uri": GOOGLE_REDIRECT,
|
| 203 |
+
"grant_type": "authorization_code",
|
| 204 |
+
},
|
| 205 |
+
).json()
|
| 206 |
+
|
| 207 |
+
access_token = token_res.get("access_token")
|
| 208 |
+
if not access_token:
|
| 209 |
+
return jsonify({"error": "Token exchange failed", "detail": token_res}), 400
|
| 210 |
+
|
| 211 |
+
info = http.get(
|
| 212 |
+
"https://www.googleapis.com/oauth2/v3/userinfo",
|
| 213 |
+
headers={"Authorization": f"Bearer {access_token}"},
|
| 214 |
+
).json()
|
| 215 |
+
|
| 216 |
+
if not info.get("sub"):
|
| 217 |
+
return jsonify({"error": "Could not retrieve user info from Google"}), 400
|
| 218 |
+
|
| 219 |
+
user = db().users.find_one_and_update(
|
| 220 |
+
{"provider_id": info["sub"], "provider": "google"},
|
| 221 |
+
{
|
| 222 |
+
"$set": {"name": info.get("name"), "email": info.get("email"),
|
| 223 |
+
"picture": info.get("picture"), "updated_at": now()},
|
| 224 |
+
"$setOnInsert": {"provider": "google", "provider_id": info["sub"], "created_at": now()},
|
| 225 |
+
},
|
| 226 |
+
upsert=True,
|
| 227 |
+
return_document=True,
|
| 228 |
+
)
|
| 229 |
+
|
| 230 |
+
session["user_id"] = str(user["_id"])
|
| 231 |
+
session["user_name"] = info.get("name")
|
| 232 |
+
return redirect(f"{FRONTEND_URL}/dashboard")
|
| 233 |
+
|
| 234 |
+
# βββ Session routes βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 235 |
+
|
| 236 |
+
@app.route("/auth/me")
|
| 237 |
+
def auth_me():
|
| 238 |
+
if "user_id" not in session:
|
| 239 |
+
return jsonify({"authenticated": False}), 200
|
| 240 |
+
user = db().users.find_one({"_id": ObjectId(uid())})
|
| 241 |
+
if not user:
|
| 242 |
+
session.clear()
|
| 243 |
+
return jsonify({"authenticated": False}), 200
|
| 244 |
+
return jsonify({"authenticated": True, "user": serialize_user(user)})
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
@app.route("/auth/logout", methods=["POST"])
|
| 248 |
+
def logout():
|
| 249 |
+
session.clear()
|
| 250 |
+
return jsonify({"message": "Logged out"})
|
| 251 |
+
|
| 252 |
+
|
| 253 |
+
@app.route("/api/profile")
|
| 254 |
+
@require_auth
|
| 255 |
+
def profile():
|
| 256 |
+
user = db().users.find_one({"_id": ObjectId(uid())})
|
| 257 |
+
if not user:
|
| 258 |
+
return jsonify({"error": "User not found"}), 404
|
| 259 |
+
return jsonify(serialize_user(user))
|
| 260 |
+
|
| 261 |
+
# βββ ML Model Registry (lazy-loaded, thread-safe) ββββββββββββββββββββββββββββ
|
| 262 |
+
|
| 263 |
+
_models: dict = {}
|
| 264 |
+
_model_lock = threading.Lock()
|
| 265 |
+
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 266 |
+
_compare_model_cache: dict = {} # separate cache for clause_extractor's model3
|
| 267 |
+
|
| 268 |
+
def _load_obligation_pipeline():
|
| 269 |
+
config = {
|
| 270 |
+
"model_name": os.path.join(BASE_DIR, "ckpt_obligation_fast"),
|
| 271 |
+
"device": "cpu",
|
| 272 |
+
"filter_min_confidence": 0.1,
|
| 273 |
+
"min_fields": 2,
|
| 274 |
+
}
|
| 275 |
+
return ObligationPipeline(config)
|
| 276 |
+
|
| 277 |
+
def _load_nli_model():
|
| 278 |
+
from transformers import pipeline as hf_pipeline
|
| 279 |
+
path = os.path.join(BASE_DIR, "model_3")
|
| 280 |
+
return hf_pipeline(
|
| 281 |
+
"text-classification", model=path, tokenizer=path,
|
| 282 |
+
device=-1, top_k=None, truncation=True, max_length=128,
|
| 283 |
+
)
|
| 284 |
+
|
| 285 |
+
def _load_risk_bundle():
|
| 286 |
+
pkl_path = os.path.join(BASE_DIR, "risk_model_v10_extended.pkl")
|
| 287 |
+
with open(pkl_path, "rb") as f:
|
| 288 |
+
return pickle.load(f)
|
| 289 |
+
|
| 290 |
+
def get_model(key: str):
|
| 291 |
+
if key not in _models:
|
| 292 |
+
with _model_lock:
|
| 293 |
+
if key not in _models:
|
| 294 |
+
print(f"[ML] Loading model: {key} β¦")
|
| 295 |
+
if key == "obligation":
|
| 296 |
+
_models[key] = _load_obligation_pipeline()
|
| 297 |
+
elif key == "nli":
|
| 298 |
+
_models[key] = _load_nli_model()
|
| 299 |
+
elif key == "risk":
|
| 300 |
+
_models[key] = _load_risk_bundle()
|
| 301 |
+
print(f"[ML] Model '{key}' ready.")
|
| 302 |
+
return _models[key]
|
| 303 |
+
|
| 304 |
+
def _get_compare_model():
|
| 305 |
+
"""Lazy-load clause_extractor's model3 (used by /api/compare)."""
|
| 306 |
+
if not _compare_model_cache:
|
| 307 |
+
pipe, tokenizer = load_model3(os.path.join(BASE_DIR, "model_3"), MAX_LEN)
|
| 308 |
+
_compare_model_cache["pipe"] = pipe
|
| 309 |
+
_compare_model_cache["tokenizer"] = tokenizer
|
| 310 |
+
return _compare_model_cache["pipe"], _compare_model_cache["tokenizer"]
|
| 311 |
+
|
| 312 |
+
# βββ PDF / text extraction helper ββββββββββββββββββββββββββββββββββββββββββββ
|
| 313 |
+
|
| 314 |
+
def extract_text_from_request() -> str:
|
| 315 |
+
if "file" in request.files:
|
| 316 |
+
raw = request.files["file"].read()
|
| 317 |
+
with pdfplumber.open(io.BytesIO(raw)) as pdf:
|
| 318 |
+
return "\n".join(p.extract_text() or "" for p in pdf.pages)
|
| 319 |
+
return (request.get_json(silent=True) or {}).get("text", "")
|
| 320 |
+
|
| 321 |
+
# βββ COVENANT OBLIGATION EXTRACTION (/api/analyze) βββββββββββββββββββββββββββ
|
| 322 |
+
|
| 323 |
+
@app.route("/api/analyze", methods=["POST"])
|
| 324 |
+
# @require_auth # Uncomment to lock behind auth
|
| 325 |
+
def analyze_contract():
|
| 326 |
+
text = extract_text_from_request()
|
| 327 |
+
|
| 328 |
+
print("\n" + "=" * 40)
|
| 329 |
+
print("--- 1. INCOMING TEXT TO AI ---")
|
| 330 |
+
print(text[:400].strip() if text else "WARNING: TEXT IS EMPTY!")
|
| 331 |
+
print("=" * 40 + "\n")
|
| 332 |
+
|
| 333 |
+
if not text.strip():
|
| 334 |
+
return jsonify({"error": "No contract text provided"}), 400
|
| 335 |
+
|
| 336 |
+
pipeline = get_model("obligation")
|
| 337 |
+
try:
|
| 338 |
+
raw_results = pipeline.process(
|
| 339 |
+
source=text, source_type="text", contract_id="api_upload", debug=True
|
| 340 |
+
)
|
| 341 |
+
print("\n" + "=" * 40)
|
| 342 |
+
print("--- 2. RAW PIPELINE OUTPUT ---")
|
| 343 |
+
print(raw_results)
|
| 344 |
+
print("=" * 40 + "\n")
|
| 345 |
+
|
| 346 |
+
try:
|
| 347 |
+
from all_model_code.model_1_code.stage1_ingestion import ingest
|
| 348 |
+
from all_model_code.model_1_code.stage2_cleaning import clean_text
|
| 349 |
+
cleaned_text = clean_text(ingest(text, "text"))
|
| 350 |
+
except Exception:
|
| 351 |
+
cleaned_text = text
|
| 352 |
+
|
| 353 |
+
obligations = []
|
| 354 |
+
for i, r in enumerate(raw_results):
|
| 355 |
+
metric_name = r.get("metric_name", "Unknown Metric")
|
| 356 |
+
op = r.get("operator", "must maintain")
|
| 357 |
+
val = r.get("threshold_value", "a specific value")
|
| 358 |
+
score = r.get("confidence_score", 0.5)
|
| 359 |
+
risk = max(5, min(95, round((1 - score) * 80 + 10)))
|
| 360 |
+
obligations.append({
|
| 361 |
+
"id": f"C{i+1}",
|
| 362 |
+
"clause": str(metric_name).replace("_", " ").title()[:30],
|
| 363 |
+
"type": "Financial Covenant",
|
| 364 |
+
"desc": f"The entity {op} a {metric_name} of {val}.",
|
| 365 |
+
"confidence": round(score * 100, 1),
|
| 366 |
+
"risk": risk,
|
| 367 |
+
"source_text": r.get("source_text", ""),
|
| 368 |
+
})
|
| 369 |
+
|
| 370 |
+
if not obligations:
|
| 371 |
+
return jsonify({"error": "No strict numerical obligations found."}), 422
|
| 372 |
+
|
| 373 |
+
return jsonify({
|
| 374 |
+
"obligations": obligations,
|
| 375 |
+
"clause_count": len(obligations),
|
| 376 |
+
"contract_text": cleaned_text,
|
| 377 |
+
})
|
| 378 |
+
|
| 379 |
+
except Exception as e:
|
| 380 |
+
print(f"[analyze] Pipeline error: {e}")
|
| 381 |
+
return jsonify({"error": "Failed to process contract through AI pipeline."}), 500
|
| 382 |
+
|
| 383 |
+
# βββ CROSS-CONTRACT CONFLICT COMPARISON (/api/compare) βββββββββββββββββββββββ
|
| 384 |
+
# Uses clause_extractor.py + Groq to extract and compare two full contracts.
|
| 385 |
+
# Falls back to mock data when EXTRACTOR_AVAILABLE is False.
|
| 386 |
+
|
| 387 |
+
MOCK_COMPARE_RESPONSE = {
|
| 388 |
+
"clauses_a": [
|
| 389 |
+
{"clause_type": "termination", "clause_text": "Either party may terminate this agreement for convenience upon 30 days written notice.", "contract": "Contract A"},
|
| 390 |
+
{"clause_type": "warranty", "clause_text": "Seller warrants all deliverables shall be free from defects for 24 months from acceptance.", "contract": "Contract A"},
|
| 391 |
+
{"clause_type": "dispute_resolution", "clause_text": "All disputes shall be resolved through binding arbitration in New York under AAA rules.", "contract": "Contract A"},
|
| 392 |
+
{"clause_type": "ip_ownership", "clause_text": "Licensee is granted an exclusive, worldwide, perpetual license to use the Software.", "contract": "Contract A"},
|
| 393 |
+
{"clause_type": "confidentiality", "clause_text": "Neither party shall disclose Confidential Information to any third party without prior written consent.", "contract": "Contract A"},
|
| 394 |
+
{"clause_type": "governing_law", "clause_text": "This agreement shall be governed by the laws of Delaware.", "contract": "Contract A"},
|
| 395 |
+
],
|
| 396 |
+
"clauses_b": [
|
| 397 |
+
{"clause_type": "termination", "clause_text": "This agreement may only be terminated for cause β material breach uncured for 60 days after notice.", "contract": "Contract B"},
|
| 398 |
+
{"clause_type": "warranty", "clause_text": "Seller disclaims all warranties, express or implied, including merchantability or fitness for purpose.", "contract": "Contract B"},
|
| 399 |
+
{"clause_type": "dispute_resolution", "clause_text": "Either party may bring suit in any court of competent jurisdiction to resolve disputes.", "contract": "Contract B"},
|
| 400 |
+
{"clause_type": "ip_ownership", "clause_text": "License granted is non-exclusive, limited to the United States, valid for 12 months only.", "contract": "Contract B"},
|
| 401 |
+
{"clause_type": "confidentiality", "clause_text": "Confidential Information must not be shared with outside parties unless the disclosing party agrees.", "contract": "Contract B"},
|
| 402 |
+
{"clause_type": "governing_law", "clause_text": "This agreement is governed by the laws of California.", "contract": "Contract B"},
|
| 403 |
+
],
|
| 404 |
+
"conflicts": [
|
| 405 |
+
{"clause_type": "termination", "clause_a": "Either party may terminate this agreement for convenience upon 30 days written notice.", "clause_b": "This agreement may only be terminated for cause β material breach uncured for 60 days after notice.", "predicted_label": "contradiction", "predicted_score": 0.9312, "contradiction_score": 0.9312, "all_scores": {"contradiction": 0.9312, "entailment": 0.0421, "neutral": 0.0267}, "token_length": 87, "uncertain": False},
|
| 406 |
+
{"clause_type": "warranty", "clause_a": "Seller warrants all deliverables shall be free from defects for 24 months from acceptance.", "clause_b": "Seller disclaims all warranties, express or implied, including merchantability or fitness for purpose.", "predicted_label": "contradiction", "predicted_score": 0.9741, "contradiction_score": 0.9741, "all_scores": {"contradiction": 0.9741, "entailment": 0.0159, "neutral": 0.0100}, "token_length": 72, "uncertain": False},
|
| 407 |
+
{"clause_type": "dispute_resolution", "clause_a": "All disputes shall be resolved through binding arbitration in New York under AAA rules.", "clause_b": "Either party may bring suit in any court of competent jurisdiction to resolve disputes.", "predicted_label": "contradiction", "predicted_score": 0.8823, "contradiction_score": 0.8823, "all_scores": {"contradiction": 0.8823, "entailment": 0.0712, "neutral": 0.0465}, "token_length": 65, "uncertain": False},
|
| 408 |
+
{"clause_type": "ip_ownership", "clause_a": "Licensee is granted an exclusive, worldwide, perpetual license to use the Software.", "clause_b": "License granted is non-exclusive, limited to the United States, valid for 12 months only.", "predicted_label": "contradiction", "predicted_score": 0.9567, "contradiction_score": 0.9567, "all_scores": {"contradiction": 0.9567, "entailment": 0.0281, "neutral": 0.0152}, "token_length": 68, "uncertain": False},
|
| 409 |
+
{"clause_type": "governing_law", "clause_a": "This agreement shall be governed by the laws of Delaware.", "clause_b": "This agreement is governed by the laws of California.", "predicted_label": "contradiction", "predicted_score": 0.7834, "contradiction_score": 0.7834, "all_scores": {"contradiction": 0.7834, "entailment": 0.1243, "neutral": 0.0923}, "token_length": 45, "uncertain": False},
|
| 410 |
+
{"clause_type": "confidentiality", "clause_a": "Neither party shall disclose Confidential Information to any third party without prior written consent.", "clause_b": "Confidential Information must not be shared with outside parties unless the disclosing party agrees.", "predicted_label": "neutral", "predicted_score": 0.5821, "contradiction_score": 0.2341, "all_scores": {"contradiction": 0.2341, "entailment": 0.1838, "neutral": 0.5821}, "token_length": 58, "uncertain": True},
|
| 411 |
+
],
|
| 412 |
+
}
|
| 413 |
+
|
| 414 |
+
|
| 415 |
+
@app.route("/api/compare", methods=["POST"])
|
| 416 |
+
def compare_contracts():
|
| 417 |
+
"""
|
| 418 |
+
POST /api/compare
|
| 419 |
+
Body: { "contract_a": "...", "contract_b": "..." }
|
| 420 |
+
Returns: { clauses_a, clauses_b, conflicts }
|
| 421 |
+
|
| 422 |
+
Extracts clauses from both contracts via Groq (clause_extractor.py) then
|
| 423 |
+
scores each matched pair with model_3 for entailment / contradiction.
|
| 424 |
+
Falls back to MOCK_COMPARE_RESPONSE when clause_extractor is unavailable.
|
| 425 |
+
"""
|
| 426 |
+
data = request.get_json(force=True, silent=True) or {} # force=True ignores Content-Type
|
| 427 |
+
contract_a = (data.get("contract_a") or "").strip()
|
| 428 |
+
contract_b = (data.get("contract_b") or "").strip()
|
| 429 |
+
|
| 430 |
+
if not contract_a or not contract_b:
|
| 431 |
+
print(f"[compare] 400 β got keys: {list(data.keys())}, "
|
| 432 |
+
f"a={bool(contract_a)}, b={bool(contract_b)}")
|
| 433 |
+
return jsonify({"error": "Both contract_a and contract_b are required"}), 400
|
| 434 |
+
|
| 435 |
+
if not EXTRACTOR_AVAILABLE:
|
| 436 |
+
print("[MOCK] clause_extractor unavailable β returning mock compare data")
|
| 437 |
+
return jsonify(MOCK_COMPARE_RESPONSE)
|
| 438 |
+
|
| 439 |
+
try:
|
| 440 |
+
print("\n[API] Extracting clauses via Groq...")
|
| 441 |
+
clauses_a = extract_clauses(contract_a, "Contract A")
|
| 442 |
+
clauses_b = extract_clauses(contract_b, "Contract B")
|
| 443 |
+
|
| 444 |
+
if not clauses_a or not clauses_b:
|
| 445 |
+
return jsonify({"error": "Clause extraction returned empty. Check GROQ_API_KEY."}), 500
|
| 446 |
+
|
| 447 |
+
print("[API] Generating pairs...")
|
| 448 |
+
pairs = generate_pairs(clauses_a, clauses_b)
|
| 449 |
+
|
| 450 |
+
conflicts = []
|
| 451 |
+
if pairs:
|
| 452 |
+
print(f"[API] Scoring {len(pairs)} pairs with model_3...")
|
| 453 |
+
pipe, tokenizer = _get_compare_model()
|
| 454 |
+
conflicts = score_pairs(pairs, pipe, tokenizer, MAX_LEN, CONF_THRESHOLD)
|
| 455 |
+
|
| 456 |
+
return jsonify({"clauses_a": clauses_a, "clauses_b": clauses_b, "conflicts": conflicts})
|
| 457 |
+
|
| 458 |
+
except Exception as e:
|
| 459 |
+
print(f"[compare] Pipeline error: {e}")
|
| 460 |
+
return jsonify({"error": str(e)}), 500
|
| 461 |
+
|
| 462 |
+
# βββ SINGLE-CLAUSE NLI (/api/conflicts) ββββββββββββββββββββββββββββββββββββββ
|
| 463 |
+
|
| 464 |
+
@app.route("/api/conflicts", methods=["POST"])
|
| 465 |
+
def detect_conflicts():
|
| 466 |
+
"""
|
| 467 |
+
POST /api/conflicts
|
| 468 |
+
Body: { "clause1": "...", "clause2": "..." }
|
| 469 |
+
Returns: { label, confidence, scores }
|
| 470 |
+
"""
|
| 471 |
+
data = request.get_json(silent=True) or {}
|
| 472 |
+
clause1 = (data.get("clause1") or "").strip()
|
| 473 |
+
clause2 = (data.get("clause2") or "").strip()
|
| 474 |
+
|
| 475 |
+
if not clause1 or not clause2:
|
| 476 |
+
return jsonify({"error": "Both clause1 and clause2 are required"}), 400
|
| 477 |
+
|
| 478 |
+
pipe = get_model("nli")
|
| 479 |
+
raw = pipe(f"{clause1} [SEP] {clause2}")
|
| 480 |
+
if raw and isinstance(raw[0], list):
|
| 481 |
+
raw = raw[0]
|
| 482 |
+
|
| 483 |
+
scores = {r["label"]: round(r["score"] * 100, 2) for r in raw}
|
| 484 |
+
best = max(scores, key=scores.get)
|
| 485 |
+
return jsonify({"label": best, "confidence": scores[best], "scores": scores})
|
| 486 |
+
|
| 487 |
+
# βββ RISK FORECAST (/api/risk) ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 488 |
+
|
| 489 |
+
import sys as _sys
|
| 490 |
+
_sys.path.insert(0, os.path.join(BASE_DIR, 'model_2'))
|
| 491 |
+
try:
|
| 492 |
+
from inference_demo import load_ticker_data, build_risk_score
|
| 493 |
+
except ImportError:
|
| 494 |
+
print("[!] Warning: Could not import inference_demo from ../model_2")
|
| 495 |
+
def load_ticker_data(ticker, d): raise NotImplementedError("inference_demo not found")
|
| 496 |
+
def build_risk_score(df): raise NotImplementedError("inference_demo not found")
|
| 497 |
+
|
| 498 |
+
|
| 499 |
+
@app.route("/api/risk", methods=["GET"])
|
| 500 |
+
def risk_forecast():
|
| 501 |
+
"""GET /api/risk?ticker=AAPL&horizon=90"""
|
| 502 |
+
ticker = (request.args.get("ticker") or "AAPL").upper()
|
| 503 |
+
horizon = int(request.args.get("horizon", 90))
|
| 504 |
+
|
| 505 |
+
bundle = get_model("risk")
|
| 506 |
+
if ticker not in bundle["models"]:
|
| 507 |
+
return jsonify({"error": f"{ticker} not in model"}), 404
|
| 508 |
+
|
| 509 |
+
try:
|
| 510 |
+
df = load_ticker_data(ticker.lower(), os.path.join(BASE_DIR, 'data/Stocks'))
|
| 511 |
+
fe = build_risk_score(df)
|
| 512 |
+
except Exception as e:
|
| 513 |
+
return jsonify({"error": f"Failed to engineer features: {e}"}), 500
|
| 514 |
+
|
| 515 |
+
payload = bundle["models"][ticker]
|
| 516 |
+
model = payload["model"]
|
| 517 |
+
r_min = payload["r_min"]
|
| 518 |
+
r_max = payload["r_max"]
|
| 519 |
+
threshold = payload["threshold"]
|
| 520 |
+
|
| 521 |
+
current_risk_raw = fe['risk_raw'].iloc[-1]
|
| 522 |
+
current_risk_norm = (current_risk_raw - r_min) / (r_max - r_min + 1e-9)
|
| 523 |
+
|
| 524 |
+
future = model.make_future_dataframe(periods=horizon, freq='B')
|
| 525 |
+
forecast = model.predict(future)
|
| 526 |
+
|
| 527 |
+
# ββ BUG FIX: always use the DatetimeIndex, never the 'Date' column.
|
| 528 |
+
# The 'Date' column can resolve to today on some machines, which makes
|
| 529 |
+
# is_future_or_current False for every row and leaves all yhat = null.
|
| 530 |
+
last_date = pd.to_datetime(fe.index[-1])
|
| 531 |
+
print(f"[DEBUG] last_date={last_date} fe.shape={fe.shape}")
|
| 532 |
+
|
| 533 |
+
future_fc = forecast[forecast['ds'] > last_date]
|
| 534 |
+
|
| 535 |
+
breach_detected = False
|
| 536 |
+
days_to_breach = None
|
| 537 |
+
confidence = "NONE"
|
| 538 |
+
breach_date = None
|
| 539 |
+
|
| 540 |
+
for conf, col in [('HIGH', 'yhat_lower'), ('MEDIUM', 'yhat'), ('LOW', 'yhat_upper')]:
|
| 541 |
+
rows = future_fc[future_fc[col] > threshold]
|
| 542 |
+
if not rows.empty:
|
| 543 |
+
breach_detected = True
|
| 544 |
+
confidence = conf
|
| 545 |
+
breach_date = str(rows.iloc[0]['ds'].date())
|
| 546 |
+
days_to_breach = max((rows.iloc[0]['ds'] - last_date).days, 0)
|
| 547 |
+
break
|
| 548 |
+
|
| 549 |
+
if breach_detected and breach_date:
|
| 550 |
+
try:
|
| 551 |
+
ob_type = ObligationType.LIQUIDITY_RATIO if ticker == 'CHK' else ObligationType.REVENUE
|
| 552 |
+
breach_obj = BreachedObligation(
|
| 553 |
+
contract_id=f"AUTO-{ticker}",
|
| 554 |
+
obligation_type=ob_type,
|
| 555 |
+
metric_name="Financial Risk Score",
|
| 556 |
+
threshold_value=round(float(threshold), 2),
|
| 557 |
+
current_value=round(float(current_risk_norm), 2),
|
| 558 |
+
predicted_value=None,
|
| 559 |
+
deadline=breach_date,
|
| 560 |
+
consequence="Covenant Violation Predicted by Prophet Model",
|
| 561 |
+
conflict_with=None,
|
| 562 |
+
)
|
| 563 |
+
scheduler.process_breach(breach_obj)
|
| 564 |
+
except Exception as e:
|
| 565 |
+
print(f"Failed to auto-schedule breach: {e}")
|
| 566 |
+
|
| 567 |
+
def norm(val):
|
| 568 |
+
span = r_max - r_min if r_max != r_min else 1
|
| 569 |
+
return round(float(np.clip((val - r_min) / span * 100, 0, 100)), 2)
|
| 570 |
+
|
| 571 |
+
# Build a DatetimeIndex-keyed lookup for fast y_norm resolution
|
| 572 |
+
fe_indexed = fe["risk_raw"] if fe.index.dtype == "datetime64[ns]" else fe.set_index(pd.to_datetime(fe.index))["risk_raw"]
|
| 573 |
+
|
| 574 |
+
series = []
|
| 575 |
+
for _, row in forecast.iterrows():
|
| 576 |
+
ds_ts = pd.Timestamp(row["ds"])
|
| 577 |
+
is_future_or_current = ds_ts >= last_date
|
| 578 |
+
|
| 579 |
+
# Historical actual value β look up by normalised date key
|
| 580 |
+
y_norm = None
|
| 581 |
+
try:
|
| 582 |
+
y_val = fe_indexed.loc[ds_ts]
|
| 583 |
+
y_norm = norm(float(y_val))
|
| 584 |
+
except KeyError:
|
| 585 |
+
pass
|
| 586 |
+
|
| 587 |
+
series.append({
|
| 588 |
+
"ds": str(ds_ts.date()),
|
| 589 |
+
"y": y_norm,
|
| 590 |
+
"yhat": round(float(np.clip(row["yhat"] * 100, 0, 100)), 2) if is_future_or_current else None,
|
| 591 |
+
"yhat_lower": round(float(np.clip(row["yhat_lower"] * 100, 0, 100)), 2) if is_future_or_current else None,
|
| 592 |
+
"yhat_upper": round(float(np.clip(row["yhat_upper"] * 100, 0, 100)), 2) if is_future_or_current else None,
|
| 593 |
+
"yhat_range": [
|
| 594 |
+
round(float(np.clip(row["yhat_lower"] * 100, 0, 100)), 2),
|
| 595 |
+
round(float(np.clip(row["yhat_upper"] * 100, 0, 100)), 2),
|
| 596 |
+
] if is_future_or_current else None,
|
| 597 |
+
})
|
| 598 |
+
|
| 599 |
+
return jsonify({
|
| 600 |
+
"ticker": ticker,
|
| 601 |
+
"available_tickers": list(bundle["models"].keys()),
|
| 602 |
+
"last_update_date": str(last_date.date()),
|
| 603 |
+
"current_price": round(fe['Close'].iloc[-1], 2),
|
| 604 |
+
"risk_metrics": {
|
| 605 |
+
"current_score": round(current_risk_norm, 4),
|
| 606 |
+
"danger_threshold": round(threshold, 4),
|
| 607 |
+
"is_in_danger_zone": bool(current_risk_norm > threshold),
|
| 608 |
+
},
|
| 609 |
+
"forecast": {
|
| 610 |
+
"breach_predicted": breach_detected,
|
| 611 |
+
"estimated_days_to_breach": days_to_breach,
|
| 612 |
+
"confidence_level": confidence,
|
| 613 |
+
},
|
| 614 |
+
# Legacy flat keys kept for frontend backward-compat
|
| 615 |
+
"breach_detected": breach_detected,
|
| 616 |
+
"breach_date": breach_date,
|
| 617 |
+
"days_to_breach": days_to_breach,
|
| 618 |
+
"confidence_tier": confidence,
|
| 619 |
+
"risk_score": round(current_risk_norm * 100, 2),
|
| 620 |
+
"threshold": round(float(threshold * 100), 2),
|
| 621 |
+
"forecast_series": series,
|
| 622 |
+
"model_meta": {
|
| 623 |
+
"run_date": str(last_date.date()),
|
| 624 |
+
"horizon_days": horizon,
|
| 625 |
+
"target_threshold": round(float(threshold * 100), 2),
|
| 626 |
+
},
|
| 627 |
+
})
|
| 628 |
+
|
| 629 |
+
|
| 630 |
+
@app.route("/api/risk/all", methods=["GET"])
|
| 631 |
+
def get_all():
|
| 632 |
+
import json
|
| 633 |
+
mock_path = os.path.join(BASE_DIR, 'model_2/frontend_mock_api.json')
|
| 634 |
+
if os.path.exists(mock_path):
|
| 635 |
+
with open(mock_path) as f:
|
| 636 |
+
return jsonify(json.load(f))
|
| 637 |
+
return jsonify({"error": "frontend_mock_api.json not found"}), 404
|
| 638 |
+
|
| 639 |
+
|
| 640 |
+
@app.route("/api/risk/tickers", methods=["GET"])
|
| 641 |
+
def risk_tickers():
|
| 642 |
+
bundle = get_model("risk")
|
| 643 |
+
return jsonify({"tickers": list(bundle["models"].keys())})
|
| 644 |
+
|
| 645 |
+
# βββ Health check βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 646 |
+
|
| 647 |
+
@app.route("/health")
|
| 648 |
+
def health():
|
| 649 |
+
return jsonify({"status": "ok", "extractor_available": EXTRACTOR_AVAILABLE})
|
| 650 |
+
|
| 651 |
+
# βββ Entry point ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 652 |
+
|
| 653 |
+
if __name__ == "__main__":
|
| 654 |
+
os.makedirs("session_data", exist_ok=True)
|
| 655 |
+
# Disable watchdog reloader on Windows β causes WinError 10038 when
|
| 656 |
+
# ML model directories are watched. Debug logging stays active.
|
| 657 |
+
use_reloader = sys.platform != "win32" and not IS_PROD
|
| 658 |
+
app.run(host="0.0.0.0", port=7860)
|
model3.py
ADDED
|
@@ -0,0 +1,200 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ContractPulse - Flask Backend
|
| 3 |
+
Wraps clause_extractor.py pipeline for Next.js frontend consumption.
|
| 4 |
+
|
| 5 |
+
Run: python app.py
|
| 6 |
+
Requires: pip install flask flask-cors python-dotenv groq transformers torch
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import os
|
| 10 |
+
import sys
|
| 11 |
+
import json
|
| 12 |
+
from flask import Flask, request, jsonify
|
| 13 |
+
from flask_cors import CORS
|
| 14 |
+
from dotenv import load_dotenv
|
| 15 |
+
|
| 16 |
+
load_dotenv()
|
| 17 |
+
|
| 18 |
+
# ββ Add parent directory to path so clause_extractor.py is importable ββββββββ
|
| 19 |
+
# Adjust this path to wherever clause_extractor.py lives relative to app.py
|
| 20 |
+
EXTRACTOR_DIR = os.path.join(os.path.dirname(__file__), "..")
|
| 21 |
+
sys.path.insert(0, EXTRACTOR_DIR)
|
| 22 |
+
|
| 23 |
+
try:
|
| 24 |
+
from clause_extractor import extract_clauses, generate_pairs, load_model3, score_pairs
|
| 25 |
+
EXTRACTOR_AVAILABLE = True
|
| 26 |
+
except ImportError as e:
|
| 27 |
+
print(f"[WARN] clause_extractor not found: {e}. Running in mock mode.")
|
| 28 |
+
EXTRACTOR_AVAILABLE = False
|
| 29 |
+
|
| 30 |
+
app = Flask(__name__)
|
| 31 |
+
CORS(app, origins=["http://localhost:3000", "http://127.0.0.1:3000"])
|
| 32 |
+
|
| 33 |
+
# ββ Config ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 34 |
+
MODEL3_DIR = os.getenv("MODEL3_DIR", "../model_3")
|
| 35 |
+
MAX_LEN = int(os.getenv("MAX_LEN", "512"))
|
| 36 |
+
CONF_THRESHOLD = float(os.getenv("CONF_THRESHOLD", "0.7"))
|
| 37 |
+
|
| 38 |
+
# Cache model so it's not reloaded on every request
|
| 39 |
+
_model_cache = {}
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def get_model():
|
| 43 |
+
if not _model_cache:
|
| 44 |
+
pipe, tokenizer = load_model3(MODEL3_DIR, MAX_LEN)
|
| 45 |
+
_model_cache["pipe"] = pipe
|
| 46 |
+
_model_cache["tokenizer"] = tokenizer
|
| 47 |
+
return _model_cache["pipe"], _model_cache["tokenizer"]
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
# ββ Mock data for development (when extractor unavailable) ββββββββββββββββββββ
|
| 51 |
+
MOCK_RESPONSE = {
|
| 52 |
+
"clauses_a": [
|
| 53 |
+
{"clause_type": "termination", "clause_text": "Either party may terminate this agreement for convenience upon 30 days written notice.", "contract": "Contract A"},
|
| 54 |
+
{"clause_type": "warranty", "clause_text": "Seller warrants all deliverables shall be free from defects for 24 months from acceptance.", "contract": "Contract A"},
|
| 55 |
+
{"clause_type": "dispute_resolution", "clause_text": "All disputes shall be resolved through binding arbitration in New York under AAA rules.", "contract": "Contract A"},
|
| 56 |
+
{"clause_type": "ip_ownership", "clause_text": "Licensee is granted an exclusive, worldwide, perpetual license to use the Software.", "contract": "Contract A"},
|
| 57 |
+
{"clause_type": "confidentiality", "clause_text": "Neither party shall disclose Confidential Information to any third party without prior written consent.", "contract": "Contract A"},
|
| 58 |
+
{"clause_type": "governing_law", "clause_text": "This agreement shall be governed by the laws of Delaware.", "contract": "Contract A"},
|
| 59 |
+
],
|
| 60 |
+
"clauses_b": [
|
| 61 |
+
{"clause_type": "termination", "clause_text": "This agreement may only be terminated for cause β material breach uncured for 60 days after notice.", "contract": "Contract B"},
|
| 62 |
+
{"clause_type": "warranty", "clause_text": "Seller disclaims all warranties, express or implied, including merchantability or fitness for purpose.", "contract": "Contract B"},
|
| 63 |
+
{"clause_type": "dispute_resolution", "clause_text": "Either party may bring suit in any court of competent jurisdiction to resolve disputes.", "contract": "Contract B"},
|
| 64 |
+
{"clause_type": "ip_ownership", "clause_text": "License granted is non-exclusive, limited to the United States, valid for 12 months only.", "contract": "Contract B"},
|
| 65 |
+
{"clause_type": "confidentiality", "clause_text": "Confidential Information must not be shared with outside parties unless the disclosing party agrees in writing.", "contract": "Contract B"},
|
| 66 |
+
{"clause_type": "governing_law", "clause_text": "This agreement is governed by the laws of California.", "contract": "Contract B"},
|
| 67 |
+
],
|
| 68 |
+
"conflicts": [
|
| 69 |
+
{
|
| 70 |
+
"clause_type": "termination",
|
| 71 |
+
"clause_a": "Either party may terminate this agreement for convenience upon 30 days written notice.",
|
| 72 |
+
"clause_b": "This agreement may only be terminated for cause β material breach uncured for 60 days after notice.",
|
| 73 |
+
"predicted_label": "contradiction",
|
| 74 |
+
"predicted_score": 0.9312,
|
| 75 |
+
"contradiction_score": 0.9312,
|
| 76 |
+
"all_scores": {"contradiction": 0.9312, "entailment": 0.0421, "neutral": 0.0267},
|
| 77 |
+
"token_length": 87,
|
| 78 |
+
"uncertain": False,
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"clause_type": "warranty",
|
| 82 |
+
"clause_a": "Seller warrants all deliverables shall be free from defects for 24 months from acceptance.",
|
| 83 |
+
"clause_b": "Seller disclaims all warranties, express or implied, including merchantability or fitness for purpose.",
|
| 84 |
+
"predicted_label": "contradiction",
|
| 85 |
+
"predicted_score": 0.9741,
|
| 86 |
+
"contradiction_score": 0.9741,
|
| 87 |
+
"all_scores": {"contradiction": 0.9741, "entailment": 0.0159, "neutral": 0.0100},
|
| 88 |
+
"token_length": 72,
|
| 89 |
+
"uncertain": False,
|
| 90 |
+
},
|
| 91 |
+
{
|
| 92 |
+
"clause_type": "dispute_resolution",
|
| 93 |
+
"clause_a": "All disputes shall be resolved through binding arbitration in New York under AAA rules.",
|
| 94 |
+
"clause_b": "Either party may bring suit in any court of competent jurisdiction to resolve disputes.",
|
| 95 |
+
"predicted_label": "contradiction",
|
| 96 |
+
"predicted_score": 0.8823,
|
| 97 |
+
"contradiction_score": 0.8823,
|
| 98 |
+
"all_scores": {"contradiction": 0.8823, "entailment": 0.0712, "neutral": 0.0465},
|
| 99 |
+
"token_length": 65,
|
| 100 |
+
"uncertain": False,
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"clause_type": "ip_ownership",
|
| 104 |
+
"clause_a": "Licensee is granted an exclusive, worldwide, perpetual license to use the Software.",
|
| 105 |
+
"clause_b": "License granted is non-exclusive, limited to the United States, valid for 12 months only.",
|
| 106 |
+
"predicted_label": "contradiction",
|
| 107 |
+
"predicted_score": 0.9567,
|
| 108 |
+
"contradiction_score": 0.9567,
|
| 109 |
+
"all_scores": {"contradiction": 0.9567, "entailment": 0.0281, "neutral": 0.0152},
|
| 110 |
+
"token_length": 68,
|
| 111 |
+
"uncertain": False,
|
| 112 |
+
},
|
| 113 |
+
{
|
| 114 |
+
"clause_type": "governing_law",
|
| 115 |
+
"clause_a": "This agreement shall be governed by the laws of Delaware.",
|
| 116 |
+
"clause_b": "This agreement is governed by the laws of California.",
|
| 117 |
+
"predicted_label": "contradiction",
|
| 118 |
+
"predicted_score": 0.7834,
|
| 119 |
+
"contradiction_score": 0.7834,
|
| 120 |
+
"all_scores": {"contradiction": 0.7834, "entailment": 0.1243, "neutral": 0.0923},
|
| 121 |
+
"token_length": 45,
|
| 122 |
+
"uncertain": False,
|
| 123 |
+
},
|
| 124 |
+
{
|
| 125 |
+
"clause_type": "confidentiality",
|
| 126 |
+
"clause_a": "Neither party shall disclose Confidential Information to any third party without prior written consent.",
|
| 127 |
+
"clause_b": "Confidential Information must not be shared with outside parties unless the disclosing party agrees in writing.",
|
| 128 |
+
"predicted_label": "neutral",
|
| 129 |
+
"predicted_score": 0.5821,
|
| 130 |
+
"contradiction_score": 0.2341,
|
| 131 |
+
"all_scores": {"contradiction": 0.2341, "entailment": 0.1838, "neutral": 0.5821},
|
| 132 |
+
"token_length": 58,
|
| 133 |
+
"uncertain": True,
|
| 134 |
+
},
|
| 135 |
+
]
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
# ββ Routes ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 140 |
+
|
| 141 |
+
@app.route("/health", methods=["GET"])
|
| 142 |
+
def health():
|
| 143 |
+
return jsonify({"status": "ok", "extractor_available": EXTRACTOR_AVAILABLE})
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
@app.route("/analyze", methods=["POST"])
|
| 147 |
+
def analyze():
|
| 148 |
+
"""
|
| 149 |
+
POST /analyze
|
| 150 |
+
Body: { "contract_a": "...", "contract_b": "..." }
|
| 151 |
+
Returns: { clauses_a, clauses_b, conflicts }
|
| 152 |
+
"""
|
| 153 |
+
data = request.get_json()
|
| 154 |
+
if not data:
|
| 155 |
+
return jsonify({"error": "No JSON body provided"}), 400
|
| 156 |
+
|
| 157 |
+
contract_a = data.get("contract_a", "").strip()
|
| 158 |
+
contract_b = data.get("contract_b", "").strip()
|
| 159 |
+
|
| 160 |
+
if not contract_a or not contract_b:
|
| 161 |
+
return jsonify({"error": "Both contract_a and contract_b are required"}), 400
|
| 162 |
+
|
| 163 |
+
# ββ Dev mode: return mock data ββββββββββββββββββββββββββββββββββββββββββββ
|
| 164 |
+
if not EXTRACTOR_AVAILABLE:
|
| 165 |
+
print("[MOCK] Returning mock analysis data")
|
| 166 |
+
return jsonify(MOCK_RESPONSE)
|
| 167 |
+
|
| 168 |
+
# ββ Production: run the real pipeline ββββββββββββββββββββββββββββββββββββ
|
| 169 |
+
try:
|
| 170 |
+
print("\n[API] Extracting clauses via Groq...")
|
| 171 |
+
clauses_a = extract_clauses(contract_a, "Contract A")
|
| 172 |
+
clauses_b = extract_clauses(contract_b, "Contract B")
|
| 173 |
+
|
| 174 |
+
if not clauses_a or not clauses_b:
|
| 175 |
+
return jsonify({"error": "Clause extraction returned empty. Check GROQ_API_KEY."}), 500
|
| 176 |
+
|
| 177 |
+
print("[API] Generating pairs...")
|
| 178 |
+
pairs = generate_pairs(clauses_a, clauses_b)
|
| 179 |
+
|
| 180 |
+
conflicts = []
|
| 181 |
+
if pairs:
|
| 182 |
+
print(f"[API] Scoring {len(pairs)} pairs with Model 3...")
|
| 183 |
+
pipe, tokenizer = get_model()
|
| 184 |
+
conflicts = score_pairs(pairs, pipe, tokenizer, MAX_LEN, CONF_THRESHOLD)
|
| 185 |
+
|
| 186 |
+
return jsonify({
|
| 187 |
+
"clauses_a": clauses_a,
|
| 188 |
+
"clauses_b": clauses_b,
|
| 189 |
+
"conflicts": conflicts,
|
| 190 |
+
})
|
| 191 |
+
|
| 192 |
+
except Exception as e:
|
| 193 |
+
print(f"[ERROR] Pipeline failed: {e}")
|
| 194 |
+
return jsonify({"error": str(e)}), 500
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
if __name__ == "__main__":
|
| 198 |
+
print("\n[ContractPulse Backend] Starting on http://localhost:5000")
|
| 199 |
+
print(f"[ContractPulse Backend] Extractor available: {EXTRACTOR_AVAILABLE}")
|
| 200 |
+
app.run(debug=True, port=5000)
|
scheduler_api.py
ADDED
|
@@ -0,0 +1,548 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ContractPulse Scheduler β Flask REST API
|
| 3 |
+
=========================================
|
| 4 |
+
Wraps the TaskScheduler + MeetingRoomScheduler into a clean HTTP API.
|
| 5 |
+
|
| 6 |
+
Endpoints:
|
| 7 |
+
POST /api/process_breach β process a single breach
|
| 8 |
+
POST /api/process_batch β process multiple breaches
|
| 9 |
+
GET /api/tasks β list all tasks (optional ?severity=CRITICAL)
|
| 10 |
+
GET /api/meetings β list all booked meetings
|
| 11 |
+
GET /api/departments β department workload summary
|
| 12 |
+
POST /api/reset β reset the scheduler state
|
| 13 |
+
GET /api/health β health check
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
from __future__ import annotations
|
| 17 |
+
|
| 18 |
+
import os
|
| 19 |
+
import smtplib
|
| 20 |
+
from email.message import EmailMessage
|
| 21 |
+
import uuid
|
| 22 |
+
from dataclasses import dataclass, field
|
| 23 |
+
from datetime import datetime, timedelta
|
| 24 |
+
from enum import Enum
|
| 25 |
+
from typing import Optional
|
| 26 |
+
|
| 27 |
+
from flask import Blueprint, jsonify, request
|
| 28 |
+
|
| 29 |
+
scheduler_bp = Blueprint("scheduler", __name__)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 33 |
+
# SCHEDULER CORE
|
| 34 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 35 |
+
|
| 36 |
+
class Department(Enum):
|
| 37 |
+
FINANCE = "Finance"
|
| 38 |
+
LEGAL = "Legal"
|
| 39 |
+
TECH = "Tech"
|
| 40 |
+
OPERATIONS = "Operations"
|
| 41 |
+
COMPLIANCE = "Compliance"
|
| 42 |
+
EXECUTIVE = "Executive"
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
class Severity(Enum):
|
| 46 |
+
LOW = 1
|
| 47 |
+
MEDIUM = 2
|
| 48 |
+
HIGH = 3
|
| 49 |
+
CRITICAL = 4
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
class ObligationType(Enum):
|
| 53 |
+
REVENUE = "revenue"
|
| 54 |
+
DEBT_TO_EQUITY = "debt_to_equity_ratio"
|
| 55 |
+
CURRENT_RATIO = "current_ratio"
|
| 56 |
+
REPORT_SUBMISSION = "report_submission"
|
| 57 |
+
WORKFORCE_SIZE = "workforce_size"
|
| 58 |
+
CAPEX_LIMIT = "capex_limit"
|
| 59 |
+
LIQUIDITY_RATIO = "liquidity_ratio"
|
| 60 |
+
INSURANCE_COVERAGE = "insurance_coverage"
|
| 61 |
+
UNKNOWN = "unknown"
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
OBLIGATION_OWNERS: dict[ObligationType, list[Department]] = {
|
| 65 |
+
ObligationType.REVENUE: [Department.FINANCE, Department.EXECUTIVE],
|
| 66 |
+
ObligationType.DEBT_TO_EQUITY: [Department.FINANCE, Department.LEGAL],
|
| 67 |
+
ObligationType.CURRENT_RATIO: [Department.FINANCE],
|
| 68 |
+
ObligationType.REPORT_SUBMISSION: [Department.COMPLIANCE, Department.LEGAL],
|
| 69 |
+
ObligationType.WORKFORCE_SIZE: [Department.OPERATIONS, Department.LEGAL],
|
| 70 |
+
ObligationType.CAPEX_LIMIT: [Department.FINANCE, Department.OPERATIONS],
|
| 71 |
+
ObligationType.LIQUIDITY_RATIO: [Department.FINANCE, Department.EXECUTIVE],
|
| 72 |
+
ObligationType.INSURANCE_COVERAGE: [Department.COMPLIANCE, Department.LEGAL],
|
| 73 |
+
ObligationType.UNKNOWN: [Department.LEGAL],
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
SEVERITY_ESCALATION: dict[Severity, list[Department]] = {
|
| 77 |
+
Severity.LOW: [],
|
| 78 |
+
Severity.MEDIUM: [],
|
| 79 |
+
Severity.HIGH: [Department.LEGAL],
|
| 80 |
+
Severity.CRITICAL: [Department.LEGAL, Department.EXECUTIVE],
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
SEVERITY_DUE_HOURS: dict[Severity, int] = {
|
| 84 |
+
Severity.LOW: 72,
|
| 85 |
+
Severity.MEDIUM: 48,
|
| 86 |
+
Severity.HIGH: 24,
|
| 87 |
+
Severity.CRITICAL: 4,
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
@dataclass
|
| 92 |
+
class Room:
|
| 93 |
+
room_id: str
|
| 94 |
+
name: str
|
| 95 |
+
capacity: int
|
| 96 |
+
has_av: bool = True
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
@dataclass(order=True)
|
| 100 |
+
class MeetingSlot:
|
| 101 |
+
start: datetime
|
| 102 |
+
end: datetime
|
| 103 |
+
meeting_id: str = field(compare=False)
|
| 104 |
+
room_id: str = field(compare=False)
|
| 105 |
+
title: str = field(compare=False)
|
| 106 |
+
attendees: int = field(compare=False, default=4)
|
| 107 |
+
|
| 108 |
+
def overlaps(self, start: datetime, end: datetime) -> bool:
|
| 109 |
+
return self.start < end and self.end > start
|
| 110 |
+
|
| 111 |
+
def to_dict(self) -> dict:
|
| 112 |
+
return {
|
| 113 |
+
"meeting_id": self.meeting_id,
|
| 114 |
+
"title": self.title,
|
| 115 |
+
"room": self.room_id,
|
| 116 |
+
"start": self.start.isoformat(),
|
| 117 |
+
"end": self.end.isoformat(),
|
| 118 |
+
"attendees": self.attendees,
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
@dataclass
|
| 123 |
+
class Meeting:
|
| 124 |
+
title: str
|
| 125 |
+
start: datetime
|
| 126 |
+
end: datetime
|
| 127 |
+
attendees: int
|
| 128 |
+
needs_av: bool = False
|
| 129 |
+
meeting_id: str = field(default_factory=lambda: str(uuid.uuid4())[:8])
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
class MeetingRoomScheduler:
|
| 133 |
+
def __init__(self, rooms: list[Room]) -> None:
|
| 134 |
+
self.rooms = sorted(rooms, key=lambda r: r.capacity)
|
| 135 |
+
self._bookings: dict[str, list[MeetingSlot]] = {r.room_id: [] for r in rooms}
|
| 136 |
+
|
| 137 |
+
def reset(self) -> None:
|
| 138 |
+
self._bookings = {r.room_id: [] for r in self.rooms}
|
| 139 |
+
|
| 140 |
+
def schedule(self, meeting: Meeting) -> Optional[MeetingSlot]:
|
| 141 |
+
if meeting.end <= meeting.start:
|
| 142 |
+
return None
|
| 143 |
+
for room in self._eligible_rooms(meeting):
|
| 144 |
+
if self._is_free(room, meeting.start, meeting.end):
|
| 145 |
+
slot = MeetingSlot(
|
| 146 |
+
start=meeting.start, end=meeting.end,
|
| 147 |
+
meeting_id=meeting.meeting_id, room_id=room.room_id,
|
| 148 |
+
title=meeting.title, attendees=meeting.attendees,
|
| 149 |
+
)
|
| 150 |
+
self._bookings[room.room_id].append(slot)
|
| 151 |
+
self._bookings[room.room_id].sort()
|
| 152 |
+
return slot
|
| 153 |
+
return None
|
| 154 |
+
|
| 155 |
+
def free_slots(self, room_id: str, window_start: datetime, window_end: datetime, min_duration_minutes: int = 30) -> list[tuple]:
|
| 156 |
+
slots = self._bookings.get(room_id, [])
|
| 157 |
+
free, cursor = [], window_start
|
| 158 |
+
for slot in slots:
|
| 159 |
+
if slot.start > cursor:
|
| 160 |
+
if (slot.start - cursor).total_seconds() / 60 >= min_duration_minutes:
|
| 161 |
+
free.append((cursor, slot.start))
|
| 162 |
+
cursor = max(cursor, slot.end)
|
| 163 |
+
if cursor < window_end and (window_end - cursor).total_seconds() / 60 >= min_duration_minutes:
|
| 164 |
+
free.append((cursor, window_end))
|
| 165 |
+
return free
|
| 166 |
+
|
| 167 |
+
def all_bookings(self) -> list[MeetingSlot]:
|
| 168 |
+
result = []
|
| 169 |
+
for slots in self._bookings.values():
|
| 170 |
+
result.extend(slots)
|
| 171 |
+
return sorted(result)
|
| 172 |
+
|
| 173 |
+
def _eligible_rooms(self, meeting: Meeting) -> list[Room]:
|
| 174 |
+
return [r for r in self.rooms if r.capacity >= meeting.attendees and (not meeting.needs_av or r.has_av)]
|
| 175 |
+
|
| 176 |
+
def _is_free(self, room: Room, start: datetime, end: datetime) -> bool:
|
| 177 |
+
return not any(s.overlaps(start, end) for s in self._bookings[room.room_id])
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
@dataclass
|
| 181 |
+
class BreachedObligation:
|
| 182 |
+
contract_id: str
|
| 183 |
+
obligation_type: ObligationType
|
| 184 |
+
metric_name: str
|
| 185 |
+
threshold_value: float
|
| 186 |
+
current_value: float
|
| 187 |
+
predicted_value: Optional[float]
|
| 188 |
+
deadline: str
|
| 189 |
+
consequence: str
|
| 190 |
+
conflict_with: Optional[str] = None
|
| 191 |
+
|
| 192 |
+
@property
|
| 193 |
+
def breach_gap(self) -> float:
|
| 194 |
+
return self.current_value - self.threshold_value
|
| 195 |
+
|
| 196 |
+
def auto_severity(self) -> Severity:
|
| 197 |
+
critical_kw = {"termination", "default", "liquidation", "penalty"}
|
| 198 |
+
high_kw = {"notice", "cure period", "acceleration"}
|
| 199 |
+
cl = self.consequence.lower()
|
| 200 |
+
if any(k in cl for k in critical_kw) or self.conflict_with:
|
| 201 |
+
return Severity.CRITICAL
|
| 202 |
+
if any(k in cl for k in high_kw):
|
| 203 |
+
return Severity.HIGH
|
| 204 |
+
gap_pct = abs(self.breach_gap) / (abs(self.threshold_value) + 1e-9)
|
| 205 |
+
if gap_pct > 0.30:
|
| 206 |
+
return Severity.HIGH
|
| 207 |
+
if gap_pct > 0.10:
|
| 208 |
+
return Severity.MEDIUM
|
| 209 |
+
return Severity.LOW
|
| 210 |
+
|
| 211 |
+
|
| 212 |
+
@dataclass
|
| 213 |
+
class Task:
|
| 214 |
+
task_id: str
|
| 215 |
+
title: str
|
| 216 |
+
description: str
|
| 217 |
+
assigned_to: list[Department]
|
| 218 |
+
severity: Severity
|
| 219 |
+
due_by: datetime
|
| 220 |
+
contract_id: str
|
| 221 |
+
obligation_type: ObligationType
|
| 222 |
+
created_at: datetime = field(default_factory=datetime.now)
|
| 223 |
+
|
| 224 |
+
def to_dict(self) -> dict:
|
| 225 |
+
return {
|
| 226 |
+
"task_id": self.task_id,
|
| 227 |
+
"title": self.title,
|
| 228 |
+
"description": self.description,
|
| 229 |
+
"assigned_to": [d.value for d in self.assigned_to],
|
| 230 |
+
"severity": self.severity.name,
|
| 231 |
+
"due_by": self.due_by.isoformat(),
|
| 232 |
+
"contract_id": self.contract_id,
|
| 233 |
+
"obligation_type": self.obligation_type.value,
|
| 234 |
+
"created_at": self.created_at.isoformat(),
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
|
| 238 |
+
class TaskScheduler:
|
| 239 |
+
def __init__(self, room_scheduler: Optional[MeetingRoomScheduler] = None) -> None:
|
| 240 |
+
self._tasks: list[Task] = []
|
| 241 |
+
self._room_scheduler = room_scheduler
|
| 242 |
+
self._task_counter = 0
|
| 243 |
+
self._auto_meetings: list[MeetingSlot] = []
|
| 244 |
+
|
| 245 |
+
def reset(self) -> None:
|
| 246 |
+
self._tasks = []
|
| 247 |
+
self._task_counter = 0
|
| 248 |
+
self._auto_meetings = []
|
| 249 |
+
if self._room_scheduler:
|
| 250 |
+
self._room_scheduler.reset()
|
| 251 |
+
|
| 252 |
+
def process_breach(self, breach: BreachedObligation) -> tuple[Task, Optional[MeetingSlot]]:
|
| 253 |
+
severity = breach.auto_severity()
|
| 254 |
+
departments = self._resolve_departments(breach.obligation_type, severity)
|
| 255 |
+
due_by = datetime.now() + timedelta(hours=SEVERITY_DUE_HOURS[severity])
|
| 256 |
+
self._task_counter += 1
|
| 257 |
+
task = Task(
|
| 258 |
+
task_id=f"TASK-{self._task_counter:04d}",
|
| 259 |
+
title=f"[{severity.name}] Breach: {breach.metric_name} in {breach.contract_id}",
|
| 260 |
+
description=self._build_description(breach, severity),
|
| 261 |
+
assigned_to=departments,
|
| 262 |
+
severity=severity,
|
| 263 |
+
due_by=due_by,
|
| 264 |
+
contract_id=breach.contract_id,
|
| 265 |
+
obligation_type=breach.obligation_type,
|
| 266 |
+
)
|
| 267 |
+
self._tasks.append(task)
|
| 268 |
+
|
| 269 |
+
meeting_slot = None
|
| 270 |
+
if breach.conflict_with and self._room_scheduler:
|
| 271 |
+
meeting_slot = self._book_conflict_meeting(task, breach)
|
| 272 |
+
|
| 273 |
+
self._alert_team_via_email(task)
|
| 274 |
+
|
| 275 |
+
return task, meeting_slot
|
| 276 |
+
|
| 277 |
+
def _alert_team_via_email(self, task: Task) -> None:
|
| 278 |
+
if task.severity not in [Severity.CRITICAL, Severity.HIGH]:
|
| 279 |
+
return
|
| 280 |
+
email_body = f"""
|
| 281 |
+
[URGENT] Covenant Breach Alert: {task.title}
|
| 282 |
+
|
| 283 |
+
A {task.severity.name} severity breach has been processed by the ContractPulse Response Engine.
|
| 284 |
+
|
| 285 |
+
Task ID: {task.task_id}
|
| 286 |
+
Assigned To: {[d.value for d in task.assigned_to]}
|
| 287 |
+
Due By: {task.due_by.strftime('%Y-%m-%d %H:%M:%S')}
|
| 288 |
+
|
| 289 |
+
Details:
|
| 290 |
+
{task.description}
|
| 291 |
+
|
| 292 |
+
Please take immediate action.
|
| 293 |
+
"""
|
| 294 |
+
|
| 295 |
+
# Try to send a real email if credentials are set
|
| 296 |
+
sender_email = os.getenv("SMTP_EMAIL")
|
| 297 |
+
sender_password = os.getenv("SMTP_PASSWORD")
|
| 298 |
+
receiver_email = os.getenv("ALERT_RECEIVER_EMAIL", sender_email) # Send to self if no specific receiver
|
| 299 |
+
|
| 300 |
+
if sender_email and sender_password and receiver_email:
|
| 301 |
+
try:
|
| 302 |
+
msg = EmailMessage()
|
| 303 |
+
msg.set_content(email_body)
|
| 304 |
+
msg['Subject'] = f"[URGENT] Covenant Breach Alert: {task.title}"
|
| 305 |
+
msg['From'] = sender_email
|
| 306 |
+
msg['To'] = receiver_email
|
| 307 |
+
|
| 308 |
+
# Assuming Gmail SMTP for this example
|
| 309 |
+
with smtplib.SMTP_SSL("smtp.gmail.com", 465) as server:
|
| 310 |
+
server.login(sender_email, sender_password)
|
| 311 |
+
server.send_message(msg)
|
| 312 |
+
|
| 313 |
+
print(f"π§ REAL EMAIL SUCCESSFULLY SENT TO {receiver_email}")
|
| 314 |
+
return
|
| 315 |
+
except Exception as e:
|
| 316 |
+
print(f"Failed to send real email: {e}. Falling back to terminal log.")
|
| 317 |
+
|
| 318 |
+
# Fallback to terminal if no credentials or sending fails
|
| 319 |
+
print("\n" + "="*60)
|
| 320 |
+
print("π§ AUTOMATED EMAIL ALERT DISPATCHED (Terminal Mock)")
|
| 321 |
+
print("="*60)
|
| 322 |
+
print(email_body)
|
| 323 |
+
print("="*60 + "\n")
|
| 324 |
+
|
| 325 |
+
def process_batch(self, breaches: list[BreachedObligation]) -> tuple[list[Task], list[MeetingSlot]]:
|
| 326 |
+
results = [self.process_breach(b) for b in breaches]
|
| 327 |
+
tasks = [r[0] for r in results]
|
| 328 |
+
meetings = [r[1] for r in results if r[1]]
|
| 329 |
+
return sorted(tasks, key=lambda t: t.severity.value, reverse=True), meetings
|
| 330 |
+
|
| 331 |
+
def all_tasks(self) -> list[Task]:
|
| 332 |
+
return sorted(self._tasks, key=lambda t: t.severity.value, reverse=True)
|
| 333 |
+
|
| 334 |
+
def department_summary(self) -> dict:
|
| 335 |
+
summary = {}
|
| 336 |
+
for task in self._tasks:
|
| 337 |
+
for dept in task.assigned_to:
|
| 338 |
+
key = dept.value
|
| 339 |
+
if key not in summary:
|
| 340 |
+
summary[key] = {s.name: 0 for s in Severity}
|
| 341 |
+
summary[key][task.severity.name] += 1
|
| 342 |
+
return summary
|
| 343 |
+
|
| 344 |
+
def _resolve_departments(self, ob_type: ObligationType, severity: Severity) -> list[Department]:
|
| 345 |
+
primary = OBLIGATION_OWNERS.get(ob_type, [Department.LEGAL])
|
| 346 |
+
escalated = SEVERITY_ESCALATION.get(severity, [])
|
| 347 |
+
return list(dict.fromkeys(primary + escalated))
|
| 348 |
+
|
| 349 |
+
def _build_description(self, breach: BreachedObligation, severity: Severity) -> str:
|
| 350 |
+
lines = [
|
| 351 |
+
f"Contract : {breach.contract_id}",
|
| 352 |
+
f"Metric : {breach.metric_name}",
|
| 353 |
+
f"Threshold : {breach.threshold_value}",
|
| 354 |
+
f"Current : {breach.current_value} (gap={breach.breach_gap:+.2f})",
|
| 355 |
+
]
|
| 356 |
+
if breach.predicted_value is not None:
|
| 357 |
+
lines.append(f"Predicted : {breach.predicted_value} (next period)")
|
| 358 |
+
lines += [f"Deadline : {breach.deadline}", f"Consequence: {breach.consequence}"]
|
| 359 |
+
if breach.conflict_with:
|
| 360 |
+
lines.append(f"β Conflicts with: {breach.conflict_with}")
|
| 361 |
+
return "\n ".join(lines)
|
| 362 |
+
|
| 363 |
+
def _book_conflict_meeting(self, task: Task, breach: BreachedObligation) -> Optional[MeetingSlot]:
|
| 364 |
+
now = datetime.now().replace(second=0, microsecond=0)
|
| 365 |
+
window_end = now + timedelta(hours=8)
|
| 366 |
+
for room in self._room_scheduler.rooms:
|
| 367 |
+
free = self._room_scheduler.free_slots(room.room_id, now, window_end, min_duration_minutes=60)
|
| 368 |
+
if free:
|
| 369 |
+
start, _ = free[0]
|
| 370 |
+
meeting = Meeting(
|
| 371 |
+
title=f"Conflict Review: {breach.contract_id} vs {breach.conflict_with}",
|
| 372 |
+
start=start, end=start + timedelta(minutes=60),
|
| 373 |
+
attendees=max(3, len(task.assigned_to) * 2),
|
| 374 |
+
needs_av=True,
|
| 375 |
+
)
|
| 376 |
+
slot = self._room_scheduler.schedule(meeting)
|
| 377 |
+
if slot:
|
| 378 |
+
self._auto_meetings.append(slot)
|
| 379 |
+
return slot
|
| 380 |
+
return None
|
| 381 |
+
|
| 382 |
+
|
| 383 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 384 |
+
# GLOBAL SCHEDULER INSTANCE
|
| 385 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 386 |
+
|
| 387 |
+
def _build_scheduler() -> TaskScheduler:
|
| 388 |
+
rooms = [
|
| 389 |
+
Room("R1", "Boardroom", capacity=20, has_av=True),
|
| 390 |
+
Room("R2", "Conference A", capacity=10, has_av=True),
|
| 391 |
+
Room("R3", "Conference B", capacity=8, has_av=False),
|
| 392 |
+
Room("R4", "Huddle Room 1", capacity=4, has_av=False),
|
| 393 |
+
Room("R5", "Huddle Room 2", capacity=4, has_av=True),
|
| 394 |
+
]
|
| 395 |
+
return TaskScheduler(room_scheduler=MeetingRoomScheduler(rooms))
|
| 396 |
+
|
| 397 |
+
|
| 398 |
+
# Use a mutable container so blueprint routes can rebind it reliably
|
| 399 |
+
_state = {"scheduler": _build_scheduler()}
|
| 400 |
+
|
| 401 |
+
|
| 402 |
+
def get_scheduler() -> TaskScheduler:
|
| 403 |
+
return _state["scheduler"]
|
| 404 |
+
|
| 405 |
+
|
| 406 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 407 |
+
# HELPERS
|
| 408 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 409 |
+
|
| 410 |
+
def _parse_obligation_type(raw: str) -> ObligationType:
|
| 411 |
+
for ot in ObligationType:
|
| 412 |
+
if ot.value == raw:
|
| 413 |
+
return ot
|
| 414 |
+
return ObligationType.UNKNOWN
|
| 415 |
+
|
| 416 |
+
|
| 417 |
+
def _breach_from_dict(d: dict) -> BreachedObligation:
|
| 418 |
+
return BreachedObligation(
|
| 419 |
+
contract_id=d["contract_id"],
|
| 420 |
+
obligation_type=_parse_obligation_type(d.get("obligation_type", "unknown")),
|
| 421 |
+
metric_name=d.get("metric_name", d.get("obligation_type", "unknown")),
|
| 422 |
+
threshold_value=float(d["threshold_value"]),
|
| 423 |
+
current_value=float(d["current_value"]),
|
| 424 |
+
predicted_value=float(d["predicted_value"]) if d.get("predicted_value") is not None else None,
|
| 425 |
+
deadline=d.get("deadline", "unknown"),
|
| 426 |
+
consequence=d.get("consequence", "notice"),
|
| 427 |
+
# BUG FIX: treat empty string as None so conflict meetings are not
|
| 428 |
+
# accidentally triggered when the frontend sends conflict_with: ""
|
| 429 |
+
conflict_with=d.get("conflict_with") or None,
|
| 430 |
+
)
|
| 431 |
+
|
| 432 |
+
|
| 433 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 434 |
+
# ROUTES
|
| 435 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 436 |
+
|
| 437 |
+
@scheduler_bp.get("/api/health")
|
| 438 |
+
def health():
|
| 439 |
+
return jsonify({"status": "ok", "timestamp": datetime.now().isoformat()})
|
| 440 |
+
|
| 441 |
+
|
| 442 |
+
@scheduler_bp.post("/api/process_breach")
|
| 443 |
+
def process_breach():
|
| 444 |
+
"""
|
| 445 |
+
Body (JSON):
|
| 446 |
+
{
|
| 447 |
+
"contract_id": "CTR-001",
|
| 448 |
+
"obligation_type": "revenue",
|
| 449 |
+
"metric_name": "revenue",
|
| 450 |
+
"threshold_value": 5000000,
|
| 451 |
+
"current_value": 3800000,
|
| 452 |
+
"predicted_value": 3500000, // optional
|
| 453 |
+
"deadline": "annually",
|
| 454 |
+
"consequence": "termination",
|
| 455 |
+
"conflict_with": null // optional contract_id
|
| 456 |
+
}
|
| 457 |
+
"""
|
| 458 |
+
body = request.get_json(force=True)
|
| 459 |
+
try:
|
| 460 |
+
breach = _breach_from_dict(body)
|
| 461 |
+
except (KeyError, ValueError) as e:
|
| 462 |
+
return jsonify({"error": f"Invalid payload: {e}"}), 400
|
| 463 |
+
|
| 464 |
+
task, meeting_slot = get_scheduler().process_breach(breach)
|
| 465 |
+
|
| 466 |
+
return jsonify({
|
| 467 |
+
"task": task.to_dict(),
|
| 468 |
+
"meeting": meeting_slot.to_dict() if meeting_slot else None,
|
| 469 |
+
}), 201
|
| 470 |
+
|
| 471 |
+
|
| 472 |
+
@scheduler_bp.post("/api/process_batch")
|
| 473 |
+
def process_batch():
|
| 474 |
+
"""
|
| 475 |
+
Body: { "breaches": [ <breach>, ... ] }
|
| 476 |
+
Returns tasks sorted by severity desc + any auto-booked meetings.
|
| 477 |
+
"""
|
| 478 |
+
body = request.get_json(force=True)
|
| 479 |
+
raw_breaches = body.get("breaches", [])
|
| 480 |
+
if not raw_breaches:
|
| 481 |
+
return jsonify({"error": "breaches list is empty"}), 400
|
| 482 |
+
|
| 483 |
+
try:
|
| 484 |
+
breaches = [_breach_from_dict(b) for b in raw_breaches]
|
| 485 |
+
except (KeyError, ValueError) as e:
|
| 486 |
+
return jsonify({"error": f"Invalid breach in batch: {e}"}), 400
|
| 487 |
+
|
| 488 |
+
tasks, meetings = get_scheduler().process_batch(breaches)
|
| 489 |
+
|
| 490 |
+
return jsonify({
|
| 491 |
+
"tasks": [t.to_dict() for t in tasks],
|
| 492 |
+
"meetings": [m.to_dict() for m in meetings],
|
| 493 |
+
"count": len(tasks),
|
| 494 |
+
}), 201
|
| 495 |
+
|
| 496 |
+
|
| 497 |
+
@scheduler_bp.get("/api/tasks")
|
| 498 |
+
def get_tasks():
|
| 499 |
+
"""
|
| 500 |
+
Query params:
|
| 501 |
+
?severity=CRITICAL (optional filter)
|
| 502 |
+
?department=Finance (optional filter)
|
| 503 |
+
"""
|
| 504 |
+
sev_filter = request.args.get("severity", "").upper()
|
| 505 |
+
dept_filter = request.args.get("department", "")
|
| 506 |
+
tasks = get_scheduler().all_tasks()
|
| 507 |
+
|
| 508 |
+
if sev_filter and sev_filter in Severity.__members__:
|
| 509 |
+
target = Severity[sev_filter]
|
| 510 |
+
tasks = [t for t in tasks if t.severity == target]
|
| 511 |
+
|
| 512 |
+
if dept_filter:
|
| 513 |
+
dept_vals = [d.value for d in Department]
|
| 514 |
+
if dept_filter in dept_vals:
|
| 515 |
+
target_dept = Department(dept_filter)
|
| 516 |
+
tasks = [t for t in tasks if target_dept in t.assigned_to]
|
| 517 |
+
|
| 518 |
+
return jsonify({"tasks": [t.to_dict() for t in tasks], "count": len(tasks)})
|
| 519 |
+
|
| 520 |
+
|
| 521 |
+
# BUG FIX: There were TWO @scheduler_bp.get("/api/meetings") routes in the
|
| 522 |
+
# original file. Flask registers only the FIRST one it sees, which called a
|
| 523 |
+
# non-existent method `_task_scheduler_meetings()` and crashed on every
|
| 524 |
+
# request. The second (correct) route was silently ignored.
|
| 525 |
+
# Fixed: one route that correctly reads _auto_meetings from the scheduler.
|
| 526 |
+
@scheduler_bp.get("/api/meetings")
|
| 527 |
+
def get_meetings():
|
| 528 |
+
"""Return all auto-booked conflict-resolution meeting slots."""
|
| 529 |
+
auto = get_scheduler()._auto_meetings or []
|
| 530 |
+
return jsonify({"meetings": [m.to_dict() for m in auto], "count": len(auto)})
|
| 531 |
+
|
| 532 |
+
|
| 533 |
+
@scheduler_bp.get("/api/departments")
|
| 534 |
+
def get_departments():
|
| 535 |
+
return jsonify({"summary": get_scheduler().department_summary()})
|
| 536 |
+
|
| 537 |
+
|
| 538 |
+
@scheduler_bp.post("/api/reset")
|
| 539 |
+
def reset():
|
| 540 |
+
# BUG FIX: rebinding a module-level `scheduler` variable inside a
|
| 541 |
+
# blueprint function is unreliable β the local name rebinds but callers
|
| 542 |
+
# that already imported the old object keep the stale reference.
|
| 543 |
+
# Fixed: mutate the shared _state dict so all routes see the new instance.
|
| 544 |
+
_state["scheduler"] = _build_scheduler()
|
| 545 |
+
return jsonify({"status": "reset", "timestamp": datetime.now().isoformat()})
|
| 546 |
+
|
| 547 |
+
# Add this at the very bottom of scheduler_api.py
|
| 548 |
+
scheduler = _state["scheduler"]
|
test_pipeline.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import os
|
| 3 |
+
import sys
|
| 4 |
+
|
| 5 |
+
# Add project root to path so 'src' package is found
|
| 6 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
| 7 |
+
|
| 8 |
+
from all_model_code.model_1_code.pipeline import ObligationPipeline
|
| 9 |
+
|
| 10 |
+
def run_test():
|
| 11 |
+
# βββ Sample contract (you can replace this later) βββ
|
| 12 |
+
sample_contract = """
|
| 13 |
+
The Borrower shall maintain at all times a Debt-to-Equity Ratio of not more than 2.5 to 1.0, tested quarterly.
|
| 14 |
+
Failure to maintain this ratio shall constitute an event of default.
|
| 15 |
+
|
| 16 |
+
The Company shall maintain minimum annual gross revenue of at least $5,000,000 during each contract year.
|
| 17 |
+
|
| 18 |
+
The Vendor shall obtain and maintain commercial general liability insurance with a minimum coverage amount of $2,000,000.
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
# βββ Initialize pipeline βββ
|
| 22 |
+
# Ensure the model path points to the actual folder where your weights live
|
| 23 |
+
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 24 |
+
model_path = os.path.join(BASE_DIR, "ckpt_obligation_fast")
|
| 25 |
+
|
| 26 |
+
config = {
|
| 27 |
+
"model_name": model_path,
|
| 28 |
+
"device": "cpu",
|
| 29 |
+
"filter_min_confidence": 0.4,
|
| 30 |
+
"min_fields": 2
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
pipeline = ObligationPipeline(config)
|
| 34 |
+
|
| 35 |
+
print("\n" + "=" * 60)
|
| 36 |
+
print("RUNNING OBLIGATION EXTRACTION TEST")
|
| 37 |
+
print("=" * 60)
|
| 38 |
+
|
| 39 |
+
# βββ Run pipeline βββ
|
| 40 |
+
results = pipeline.process(
|
| 41 |
+
source=sample_contract,
|
| 42 |
+
source_type="text",
|
| 43 |
+
contract_id="demo_contract",
|
| 44 |
+
debug=False
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
# βββ Clean display βββ
|
| 48 |
+
cleaned_results = []
|
| 49 |
+
|
| 50 |
+
for r in results:
|
| 51 |
+
cleaned = {
|
| 52 |
+
"metric": r.get("metric_name"),
|
| 53 |
+
"operator": r.get("operator"),
|
| 54 |
+
"value": r.get("threshold_value"),
|
| 55 |
+
"deadline": r.get("deadline"),
|
| 56 |
+
"consequence": r.get("consequence"),
|
| 57 |
+
"confidence": round(r.get("confidence_score", 0), 3)
|
| 58 |
+
}
|
| 59 |
+
cleaned = {k: v for k, v in cleaned.items() if v is not None}
|
| 60 |
+
cleaned_results.append(cleaned)
|
| 61 |
+
|
| 62 |
+
# βββ Deduplicate: same metric + same value β keep highest confidence βββ
|
| 63 |
+
seen = {}
|
| 64 |
+
for obligation in cleaned_results:
|
| 65 |
+
key = (obligation.get("metric"), obligation.get("value"))
|
| 66 |
+
if key not in seen:
|
| 67 |
+
seen[key] = obligation
|
| 68 |
+
else:
|
| 69 |
+
if obligation.get("confidence", 0) > seen[key].get("confidence", 0):
|
| 70 |
+
seen[key] = obligation
|
| 71 |
+
|
| 72 |
+
deduplicated_results = list(seen.values())
|
| 73 |
+
|
| 74 |
+
print("\nExtracted Obligations:\n")
|
| 75 |
+
print(json.dumps(deduplicated_results, indent=2))
|
| 76 |
+
|
| 77 |
+
print("\nTotal Obligations Found:", len(deduplicated_results))
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
if __name__ == "__main__":
|
| 81 |
+
run_test()
|
test_routes.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import email.message
|
| 3 |
+
|
| 4 |
+
class _CGI:
|
| 5 |
+
@staticmethod
|
| 6 |
+
def parse_header(line):
|
| 7 |
+
m = email.message.Message()
|
| 8 |
+
m['content-type'] = line
|
| 9 |
+
return m.get_content_type(), m.get_params() or {}
|
| 10 |
+
|
| 11 |
+
sys.modules['cgi'] = _CGI()
|
| 12 |
+
|
| 13 |
+
from main import app
|
| 14 |
+
print([r.rule for r in app.url_map.iter_rules()])
|