kp267 commited on
Commit
ed5fc2d
·
1 Parent(s): 4a610f0

init agentic demo test

Browse files
README.md ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: DemoC Gradio Test
3
+ emoji: 🤖
4
+ colorFrom: indigo
5
+ colorTo: purple
6
+ sdk: gradio
7
+ app_file: demo_c.py
8
+ pinned: false
9
+ ---
10
+
11
+ # DemoC Gradio Test
12
+
13
+ ## Run locally
14
+ ```bash
15
+ pip install -r requirements.txt
16
+ python demoC.py
__pycache__/framework_demo_b.cpython-313.pyc ADDED
Binary file (58.6 kB). View file
 
app_backed.py ADDED
@@ -0,0 +1,1012 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ """
4
+ FINAL DEMO (v10.2): Stable, runs locally + HF Spaces.
5
+
6
+ Design goals (per your 5-step analyst workflow)
7
+ - Output must always be non-empty and human-readable.
8
+ - Trace tab must record: run_id / model_id / version_id / policy_id + 5 steps + tool_used per step.
9
+ - NO LangGraph. NO LLM tool-calling.
10
+ - FinTech:
11
+ - Python computes PD + confidence + HITL urgency (and optional model choice via LLM selector).
12
+ - Output includes explicit prediction + decision bullet.
13
+ - TE Pricing:
14
+ - Primary: Python "pricing_heuristic" (value/segment/channel aware) so it won't stick to 42/52.
15
+ - Optional: LLM proposes a price range and adjustments (no browsing). If LLM fails, we still output a valid result.
16
+ - Hard constraints enforced in Python: presale >= max(7*COGS, mult*COGS), retail > presale.
17
+
18
+ HF Spaces:
19
+ - Rename to app.py
20
+ - Set OPENAI_API_KEY in Space Secrets (optional for TE/FinTech explanation add-on, NOT required for base output)
21
+ """
22
+
23
+ import json
24
+ import math
25
+ import os
26
+ import sys
27
+ import time
28
+ import uuid
29
+ import traceback
30
+ from dataclasses import dataclass, asdict, field
31
+ from datetime import datetime, timezone
32
+ from pathlib import Path
33
+ from typing import Any, Dict, List, Optional, Tuple
34
+
35
+ import numpy as np
36
+ import pandas as pd
37
+
38
+ # OpenAI is OPTIONAL (we never allow "no content" if API fails)
39
+ try:
40
+ from openai import OpenAI
41
+ except Exception:
42
+ OpenAI = None
43
+
44
+ from sklearn.model_selection import train_test_split
45
+ from sklearn.metrics import roc_auc_score
46
+ from sklearn.linear_model import LogisticRegression
47
+ from sklearn.tree import DecisionTreeClassifier
48
+
49
+
50
+ print("\n========== FINAL_APP BOOT (v10.4) ==========")
51
+ print("RUNNING_FILE =", __file__)
52
+ print("CWD =", os.getcwd())
53
+ print("PYTHON =", sys.executable)
54
+ print("OPENAI_MODEL =", os.getenv("OPENAI_MODEL", "gpt-4o-mini"))
55
+ print("OPENAI_API_KEY_SET =", "YES" if bool(os.getenv("OPENAI_API_KEY", "")) else "NO")
56
+ print("===========================================\n")
57
+
58
+
59
+ # =========================
60
+ # Config
61
+ # =========================
62
+ APP_TITLE = "Demo C"
63
+ OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini")
64
+
65
+ AGENT_ID = os.getenv("AGENT_ID", "nexdatawork_demo_agent")
66
+ MODEL_ID = os.getenv("MODEL_ID", "python_5step_traceable")
67
+ VERSION_ID = os.getenv("VERSION_ID", "10.4.0")
68
+
69
+ POLICY_ID = os.getenv("POLICY_ID", "5step_traceable_policy")
70
+ POLICY_VERSION = os.getenv("POLICY_VERSION", "10.4")
71
+
72
+ DATA_DIR = Path(os.getenv("DATA_DIR", "./data"))
73
+ DATA_DIR.mkdir(parents=True, exist_ok=True)
74
+ RUN_LOG_PATH = DATA_DIR / os.getenv("RUN_LOG_PATH", "run_logs.jsonl")
75
+
76
+ DEFAULT_SYNTHETIC_SEED = 42
77
+ HIGH_IMPACT_AMOUNT = float(os.getenv("HIGH_IMPACT_AMOUNT", "1000000"))
78
+
79
+
80
+ # =========================
81
+ # Helpers
82
+ # =========================
83
+ def utc_now() -> str:
84
+ return datetime.now(timezone.utc).replace(microsecond=0).isoformat()
85
+
86
+ def new_id(prefix: str) -> str:
87
+ ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
88
+ return f"{prefix}_{ts}_{uuid.uuid4().hex[:8]}"
89
+
90
+ def clamp(x: float, lo: float, hi: float) -> float:
91
+ return max(lo, min(hi, x))
92
+
93
+ def sigmoid(x: float) -> float:
94
+ if x >= 0:
95
+ z = math.exp(-x)
96
+ return 1.0 / (1.0 + z)
97
+ z = math.exp(x)
98
+ return z / (1.0 + z)
99
+
100
+ def as_99(x: float) -> float:
101
+ v = round(float(x))
102
+ return float(f"{max(v, 1) - 0.01:.2f}")
103
+
104
+ def run_metadata(task_type: str, thread_id: str) -> Dict[str, Any]:
105
+ return {
106
+ "run_id": new_id("run"),
107
+ "agent_id": AGENT_ID,
108
+ "model_id": MODEL_ID,
109
+ "version_id": VERSION_ID,
110
+ "policy_id": POLICY_ID,
111
+ "policy_version": POLICY_VERSION,
112
+ "llm_model": OPENAI_MODEL,
113
+ "task_type": task_type,
114
+ "thread_id": thread_id,
115
+ "timestamps": {"created_at": utc_now()},
116
+ }
117
+
118
+
119
+ # =========================
120
+ # JSONL Logger
121
+ # =========================
122
+ class JSONLLogger:
123
+ def __init__(self, path: Path):
124
+ self.path = path
125
+ self.path.parent.mkdir(parents=True, exist_ok=True)
126
+
127
+ def append(self, payload: Dict[str, Any]) -> None:
128
+ with self.path.open("a", encoding="utf-8") as f:
129
+ f.write(json.dumps(payload, ensure_ascii=False) + "\n")
130
+
131
+ def tail(self, n: int = 50) -> List[Dict[str, Any]]:
132
+ if not self.path.exists():
133
+ return []
134
+ lines = self.path.read_text(encoding="utf-8").splitlines()
135
+ out: List[Dict[str, Any]] = []
136
+ for ln in lines[-n:]:
137
+ try:
138
+ out.append(json.loads(ln))
139
+ except Exception:
140
+ continue
141
+ return out
142
+
143
+ LOGGER = JSONLLogger(RUN_LOG_PATH)
144
+
145
+
146
+ # =========================
147
+ # Trace structures
148
+ # =========================
149
+ @dataclass
150
+ class StepTrace:
151
+ step_id: str
152
+ step_no: int
153
+ title: str
154
+ tool_used: str
155
+ started_at: str
156
+ ended_at: str
157
+ duration_ms: int
158
+ inputs: Dict[str, Any] = field(default_factory=dict)
159
+ outputs: Dict[str, Any] = field(default_factory=dict)
160
+ error: Optional[str] = None
161
+
162
+ def run_step(step_no: int, title: str, tool_used: str, inputs: Dict[str, Any], fn) -> Tuple[StepTrace, Any]:
163
+ step_id = new_id("step")
164
+ started = utc_now()
165
+ t0 = time.time()
166
+ err = None
167
+ out = None
168
+ out_obj: Dict[str, Any] = {}
169
+ try:
170
+ out = fn()
171
+ if isinstance(out, dict):
172
+ out_obj = out
173
+ else:
174
+ out_obj = {"value": out}
175
+ except Exception:
176
+ err = traceback.format_exc()
177
+ ended = utc_now()
178
+ dur = int((time.time() - t0) * 1000)
179
+ return StepTrace(
180
+ step_id=step_id,
181
+ step_no=step_no,
182
+ title=title,
183
+ tool_used=tool_used,
184
+ started_at=started,
185
+ ended_at=ended,
186
+ duration_ms=dur,
187
+ inputs=inputs,
188
+ outputs=out_obj if err is None else {},
189
+ error=err,
190
+ ), out
191
+
192
+
193
+ # =========================
194
+ # Optional LLM helpers (never required)
195
+ # =========================
196
+ def _client_or_none():
197
+ if OpenAI is None:
198
+ return None
199
+ if not os.getenv("OPENAI_API_KEY", ""):
200
+ return None
201
+ try:
202
+ return OpenAI()
203
+ except Exception:
204
+ return None
205
+
206
+ def llm_te_adjustment(client: Any, about: str, inputs: Dict[str, Any], base: Dict[str, Any]) -> Dict[str, Any]:
207
+ """
208
+ Optional: ask LLM for adjustment factor and competitor range.
209
+ Must not break if it fails.
210
+ """
211
+ prompt = {
212
+ "about": about,
213
+ "inputs": inputs,
214
+ "base": base,
215
+ "instruction": (
216
+ "No browsing. Return STRICT JSON: "
217
+ "{"
218
+ "\"adj_presale_delta\": number, "
219
+ "\"adj_retail_delta\": number, "
220
+ "\"competitor_range\": {\"low\": number, \"high\": number}, "
221
+ "\"confidence_0_100\": number, "
222
+ "\"rationale_bullets\": [..]"
223
+ "}. Keep bullets 4-7."
224
+ ),
225
+ }
226
+ try:
227
+ resp = client.chat.completions.create(
228
+ model=OPENAI_MODEL,
229
+ temperature=0.2,
230
+ messages=[
231
+ {"role": "system", "content": "Return STRICT JSON only. No markdown."},
232
+ {"role": "user", "content": json.dumps(prompt)},
233
+ ],
234
+ )
235
+ raw = (resp.choices[0].message.content or "").strip()
236
+ obj = json.loads(raw)
237
+ return {
238
+ "adj_presale_delta": float(obj.get("adj_presale_delta", 0.0)),
239
+ "adj_retail_delta": float(obj.get("adj_retail_delta", 0.0)),
240
+ "competitor_range": obj.get("competitor_range", {}) or {"low": 0.0, "high": 0.0},
241
+ "confidence_0_100": float(clamp(float(obj.get("confidence_0_100", 50.0)), 0.0, 100.0)),
242
+ "rationale_bullets": [str(x)[:200] for x in (obj.get("rationale_bullets", []) or [])][:10],
243
+ "raw": raw[:1400],
244
+ }
245
+ except Exception as e:
246
+ return {
247
+ "error": str(e)[:400],
248
+ "adj_presale_delta": 0.0,
249
+ "adj_retail_delta": 0.0,
250
+ "competitor_range": {"low": 0.0, "high": 0.0},
251
+ "confidence_0_100": 0.0,
252
+ "rationale_bullets": ["LLM adjustment unavailable (API missing or invalid JSON)."],
253
+ }
254
+
255
+
256
+ # =========================
257
+ # FinTech tools
258
+ # =========================
259
+ def fintech_build_row(inp: Dict[str, Any]) -> pd.DataFrame:
260
+ return pd.DataFrame([{
261
+ "Income": inp["income"],
262
+ "Debt": inp["debt"],
263
+ "Credit_Score": inp["credit_score"],
264
+ "Employment_Status": inp["employment_status"],
265
+ "Missed_Payments_12m": inp["missed_payments_12m"],
266
+ "Months_On_Book": inp["months_on_book"],
267
+ "Credit_Lines": inp["credit_lines"],
268
+ "Requested_Amount": inp["requested_amount"],
269
+ "Savings": inp["savings"],
270
+ "Collateral_Value": inp["collateral_value"],
271
+ "Fraud_Flag": inp["fraud_flag"],
272
+ "Existing_Customer": inp["existing_customer"],
273
+ }])
274
+
275
+ def fintech_preprocess(df: pd.DataFrame) -> Tuple[pd.DataFrame, Dict[str, Any]]:
276
+ out = df.copy()
277
+ missing_before = out.isna().sum().astype(int).to_dict()
278
+ for col in out.columns:
279
+ if pd.api.types.is_numeric_dtype(out[col]):
280
+ if out[col].isna().any():
281
+ med = pd.to_numeric(out[col], errors="coerce").median()
282
+ out[col] = pd.to_numeric(out[col], errors="coerce").fillna(med)
283
+ else:
284
+ if out[col].isna().any():
285
+ out[col] = out[col].fillna("Unknown")
286
+ missing_after = out.isna().sum().astype(int).to_dict()
287
+ return out, {"missing_before": missing_before, "missing_after": missing_after}
288
+
289
+ def fintech_features(df: pd.DataFrame) -> pd.DataFrame:
290
+ out = df.copy()
291
+ income = out["Income"].clip(lower=1.0)
292
+ debt = out["Debt"].clip(lower=0.0)
293
+ savings = out["Savings"].clip(lower=0.0)
294
+ collateral = out["Collateral_Value"].clip(lower=0.0)
295
+ amount = out["Requested_Amount"].clip(lower=1.0)
296
+
297
+ out["DTI"] = (debt / income).clip(lower=0, upper=5)
298
+ out["Savings_to_Income"] = (savings / income).clip(lower=0, upper=5)
299
+ out["Collateral_to_Amount"] = (collateral / amount).clip(lower=0, upper=10)
300
+
301
+ out["Score_Gap"] = ((850 - out["Credit_Score"]) / 550).clip(lower=0, upper=1)
302
+ out["Missed_Norm"] = (out["Missed_Payments_12m"].clip(lower=0, upper=12) / 12.0)
303
+ out["Tenure_Norm"] = (out["Months_On_Book"].clip(lower=0, upper=120) / 120.0)
304
+ out["Lines_Norm"] = (out["Credit_Lines"].clip(lower=0, upper=20) / 20.0)
305
+
306
+ emp = out["Employment_Status"].astype(str).str.lower().str.strip()
307
+ emp_w = emp.map({
308
+ "employed": 0.00, "self-employed": 0.05, "student": 0.08,
309
+ "unemployed": 0.18, "retired": 0.04, "contract": 0.06, "other": 0.07
310
+ }).fillna(0.07)
311
+ out["Employment_Risk_Weight"] = emp_w
312
+
313
+ out["Fraud_Risk"] = out["Fraud_Flag"].astype(int).clip(0, 1)
314
+ out["Loyalty_Boost"] = out["Existing_Customer"].astype(int).clip(0, 1)
315
+ return out
316
+
317
+ def fintech_tool_heuristic(df_feat: pd.DataFrame) -> Dict[str, Any]:
318
+ f = df_feat.iloc[0].to_dict()
319
+ x = (
320
+ -1.10
321
+ + 1.50 * f["DTI"]
322
+ + 1.20 * f["Score_Gap"]
323
+ + 0.95 * f["Missed_Norm"]
324
+ + 0.70 * f["Employment_Risk_Weight"]
325
+ - 0.35 * f["Tenure_Norm"]
326
+ - 0.25 * f["Lines_Norm"]
327
+ - 0.45 * f["Savings_to_Income"]
328
+ - 0.35 * f["Collateral_to_Amount"]
329
+ + 2.00 * f["Fraud_Risk"]
330
+ - 0.20 * f["Loyalty_Boost"]
331
+ )
332
+ pd_risk = sigmoid(float(x))
333
+ conf = float(clamp(abs(pd_risk - 0.5) * 200.0, 0.0, 100.0))
334
+ urg = float(clamp((100.0 - conf) * 0.75, 0.0, 100.0))
335
+ return {"tool": "heuristic", "pd_risk": pd_risk, "confidence_0_100": conf, "hitl_urgency_0_100": urg, "linear_x": float(x)}
336
+
337
+ def _fintech_make_synth_training(seed: int = 42, n: int = 1500) -> pd.DataFrame:
338
+ rng = np.random.default_rng(seed)
339
+ income = rng.lognormal(mean=np.log(65000), sigma=0.55, size=n).clip(12000, 250000)
340
+ debt = rng.lognormal(mean=np.log(18000), sigma=0.75, size=n).clip(0, 200000)
341
+ score = rng.integers(300, 851, size=n)
342
+ missed = rng.integers(0, 7, size=n)
343
+ mob = rng.integers(0, 121, size=n)
344
+ lines = rng.integers(0, 21, size=n)
345
+ savings = rng.lognormal(mean=np.log(8000), sigma=0.9, size=n).clip(0, 200000)
346
+ collateral = rng.lognormal(mean=np.log(15000), sigma=0.9, size=n).clip(0, 300000)
347
+ fraud = rng.binomial(1, 0.03, size=n)
348
+ existing = rng.binomial(1, 0.55, size=n)
349
+ emp = rng.choice(["Employed","Self-employed","Student","Unemployed","Retired","Contract","Other"], size=n)
350
+ req_amount = rng.lognormal(mean=np.log(25000), sigma=0.8, size=n).clip(500, 250000)
351
+
352
+ df = pd.DataFrame({
353
+ "Income": income, "Debt": debt, "Credit_Score": score,
354
+ "Employment_Status": emp, "Missed_Payments_12m": missed,
355
+ "Months_On_Book": mob, "Credit_Lines": lines,
356
+ "Requested_Amount": req_amount,
357
+ "Savings": savings,
358
+ "Collateral_Value": collateral,
359
+ "Fraud_Flag": fraud,
360
+ "Existing_Customer": existing,
361
+ })
362
+ df_clean, _ = fintech_preprocess(df)
363
+ df_feat = fintech_features(df_clean)
364
+
365
+ x = (
366
+ -1.10
367
+ + 1.50 * df_feat["DTI"]
368
+ + 1.20 * df_feat["Score_Gap"]
369
+ + 0.95 * df_feat["Missed_Norm"]
370
+ + 0.70 * df_feat["Employment_Risk_Weight"]
371
+ - 0.35 * df_feat["Tenure_Norm"]
372
+ - 0.25 * df_feat["Lines_Norm"]
373
+ - 0.45 * df_feat["Savings_to_Income"]
374
+ - 0.35 * df_feat["Collateral_to_Amount"]
375
+ + 2.00 * df_feat["Fraud_Risk"]
376
+ - 0.20 * df_feat["Loyalty_Boost"]
377
+ )
378
+ p = 1 / (1 + np.exp(-x))
379
+ y = rng.binomial(1, p).astype(int)
380
+
381
+ df_feat = df_feat.copy()
382
+ df_feat["y"] = y
383
+ return df_feat
384
+
385
+ def fintech_tool_logreg_synth(df_case_feat: pd.DataFrame, seed: int = 42) -> Dict[str, Any]:
386
+ train_df = _fintech_make_synth_training(seed=seed, n=1500)
387
+ cols = [
388
+ "DTI","Score_Gap","Missed_Norm","Tenure_Norm","Lines_Norm",
389
+ "Employment_Risk_Weight","Savings_to_Income","Collateral_to_Amount","Fraud_Risk","Loyalty_Boost"
390
+ ]
391
+ X = train_df[cols].astype(float)
392
+ y = train_df["y"].astype(int)
393
+
394
+ X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.25, random_state=seed, stratify=y)
395
+ model = LogisticRegression(max_iter=1000, solver="lbfgs")
396
+ model.fit(X_tr, y_tr)
397
+
398
+ auc = float(roc_auc_score(y_te, model.predict_proba(X_te)[:, 1]))
399
+ case_x = df_case_feat[cols].astype(float)
400
+
401
+ pd_risk = float(model.predict_proba(case_x)[:, 1][0])
402
+ conf = float(clamp(abs(pd_risk - 0.5) * 200.0, 0.0, 100.0))
403
+ urg = float(clamp((100.0 - conf) * 0.75, 0.0, 100.0))
404
+ return {"tool": "logreg_synth", "auc_test_synth": auc, "pd_risk": pd_risk, "confidence_0_100": conf, "hitl_urgency_0_100": urg}
405
+
406
+ def fintech_tool_tree_synth(df_case_feat: pd.DataFrame, seed: int = 42) -> Dict[str, Any]:
407
+ train_df = _fintech_make_synth_training(seed=seed, n=1500)
408
+ cols = [
409
+ "DTI","Score_Gap","Missed_Norm","Tenure_Norm","Lines_Norm",
410
+ "Employment_Risk_Weight","Savings_to_Income","Collateral_to_Amount","Fraud_Risk","Loyalty_Boost"
411
+ ]
412
+ X = train_df[cols].astype(float)
413
+ y = train_df["y"].astype(int)
414
+
415
+ X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.25, random_state=seed, stratify=y)
416
+ model = DecisionTreeClassifier(max_depth=4, random_state=seed)
417
+ model.fit(X_tr, y_tr)
418
+
419
+ auc = float(roc_auc_score(y_te, model.predict_proba(X_te)[:, 1]))
420
+ case_x = df_case_feat[cols].astype(float)
421
+
422
+ pd_risk = float(model.predict_proba(case_x)[:, 1][0])
423
+ conf = float(clamp(abs(pd_risk - 0.5) * 200.0, 0.0, 100.0))
424
+ urg = float(clamp((100.0 - conf) * 0.75, 0.0, 100.0))
425
+ return {"tool": "tree_synth", "auc_test_synth": auc, "pd_risk": pd_risk, "confidence_0_100": conf, "hitl_urgency_0_100": urg}
426
+
427
+ FINTECH_TOOL_REGISTRY = {
428
+ "heuristic": fintech_tool_heuristic,
429
+ "logreg_synth": fintech_tool_logreg_synth,
430
+ "tree_synth": fintech_tool_tree_synth,
431
+ }
432
+
433
+ def fintech_recommend(score: Dict[str, Any], requested_amount: float) -> Dict[str, Any]:
434
+ conf = float(score.get("confidence_0_100", 0.0))
435
+ urg = float(score.get("hitl_urgency_0_100", 100.0))
436
+ bump = 0.0
437
+ if HIGH_IMPACT_AMOUNT > 0 and requested_amount > 0:
438
+ ratio = requested_amount / HIGH_IMPACT_AMOUNT
439
+ bump = 20.0 * clamp(math.log10(ratio + 1.0) / math.log10(11.0), 0.0, 1.0)
440
+ urg2 = float(clamp(urg + bump, 0.0, 100.0))
441
+ decision = "Needs Human Review" if (urg2 >= 60.0 or conf <= 25.0) else "Decision Draft"
442
+ return {"decision": decision, "hitl_urgency_0_100": urg2, "prediction_pd": float(score.get("pd_risk", 0.5))}
443
+
444
+
445
+ # =========================
446
+ # TE pricing heuristic (PRIMARY)
447
+ # =========================
448
+ def te_pricing_heuristic(inp: Dict[str, Any]) -> Dict[str, Any]:
449
+ cogs = float(inp["cogs"])
450
+ landed = float(inp["landed"])
451
+ mult = float(inp["presale_mult"])
452
+ discount = float(inp["discount"])
453
+ channel = str(inp.get("channel", "DTC"))
454
+ segment = str(inp.get("target_segment", "Mid-market"))
455
+ units = int(inp.get("expected_presale_units", 0))
456
+
457
+ # User-friendly demand controls (0-100)
458
+ demand_index = float(inp.get("demand_index", 60.0))
459
+ price_sensitivity = float(inp.get("price_sensitivity", 60.0))
460
+ price_step = float(inp.get("price_step", 10.0))
461
+ demand_index = clamp(demand_index, 0.0, 100.0)
462
+ price_sensitivity = clamp(price_sensitivity, 0.0, 100.0)
463
+ price_step = max(1.0, float(price_step))
464
+
465
+ floor = max(7.0 * cogs, mult * cogs)
466
+
467
+ # segment anchor retail
468
+ # (pure heuristic so the AI demo doesn't get stuck at 42/52)
469
+ if segment.lower().startswith("budget"):
470
+ retail_anchor = 79.99
471
+ elif segment.lower().startswith("premium"):
472
+ retail_anchor = 149.99
473
+ else:
474
+ retail_anchor = 109.99
475
+
476
+ # channel adjustments
477
+ if channel.lower() == "amazon":
478
+ retail_anchor -= 10.0
479
+ elif channel.lower() == "retail":
480
+ retail_anchor += 10.0
481
+ elif channel.lower() == "wholesale":
482
+ retail_anchor -= 15.0
483
+
484
+ # volume signal: more units -> can accept slightly lower retail
485
+ if units >= 5000:
486
+ retail_anchor -= 8.0
487
+ elif units >= 2000:
488
+ retail_anchor -= 4.0
489
+ elif units > 0 and units < 300:
490
+ retail_anchor += 6.0
491
+
492
+ # Demand controls: higher demand_index supports higher willingness-to-pay; higher sensitivity pushes price down.
493
+ retail_anchor += (demand_index - 50.0) * 0.3 # up to about +/-15
494
+ retail_anchor -= (price_sensitivity - 50.0) * 0.2 # up to about +/-10
495
+
496
+ retail_floor_from_discount = floor / max(1e-6, (1.0 - discount))
497
+ retail = max(retail_anchor, retail_floor_from_discount)
498
+
499
+ presale = retail * (1.0 - discount)
500
+ presale = max(presale, floor)
501
+
502
+ presale = as_99(presale)
503
+ retail = as_99(retail)
504
+
505
+ # ensure retail>presale strictly
506
+ if retail <= presale:
507
+ retail = as_99(presale / max(1e-6, (1.0 - discount)))
508
+
509
+ checks = {
510
+ "presale_ge_7xcogs": presale >= 7.0 * cogs,
511
+ "presale_ge_floor": presale >= floor,
512
+ "retail_gt_presale": retail > presale,
513
+ "presale_gt_landed": presale > landed,
514
+ "retail_gt_landed": retail > landed,
515
+ }
516
+
517
+ return {
518
+ "tool": "pricing_heuristic",
519
+ "floor": floor,
520
+ "presale_price": presale,
521
+ "retail_price": retail,
522
+ "unit_margin_presale": float(presale - landed),
523
+ "unit_margin_retail": float(retail - landed),
524
+ "checks": checks,
525
+ "anchors": {"segment": segment, "channel": channel, "retail_anchor": retail_anchor, "units": units, "demand_index": demand_index, "price_sensitivity": price_sensitivity, "price_step": price_step},
526
+ }
527
+
528
+
529
+ # =========================
530
+ # 5-step explanation builder (Python, always non-empty)
531
+ # =========================
532
+ def build_5step_explanation_md(title: str, step_bullets: Dict[int, List[str]], evidence_md: str, prediction_bullet: str, decision_bullet: str) -> str:
533
+ md: List[str] = []
534
+ md.append("## Explanation")
535
+ # Use professional workflow section titles instead of "Step 1..5"
536
+ for i in range(1, 6):
537
+ section_title = FIVE_STEP_TITLES[i - 1]
538
+ md.append(f"### {section_title}")
539
+ blt = step_bullets.get(i, [])
540
+ if not blt:
541
+ blt = ["(no content)"]
542
+ for b in blt[:8]:
543
+ md.append(f"- {b}")
544
+ md.append("")
545
+ md.append("## Evidence")
546
+ md.append(evidence_md.strip() if evidence_md.strip() else "- (no evidence)")
547
+ md.append("")
548
+ md.append("## Prediction")
549
+ md.append(f"- {prediction_bullet}")
550
+ md.append("")
551
+ md.append("## Decision")
552
+ md.append(f"- {decision_bullet}")
553
+ return "\n".join(md).strip()
554
+
555
+
556
+ # =========================
557
+ # Workflows
558
+ # =========================
559
+ FIVE_STEP_TITLES = [
560
+ "Interpreting Context & Metrics (Questions + EDA)",
561
+ "Pre-Processing Data (Cleaning + Missing Values)",
562
+ "Processing Data (Transform + Feature Build)",
563
+ "Analyzing Data (Modeling + Scoring)",
564
+ "Trend Analysis & Predictions (Decision + Share)",
565
+ ]
566
+ def run_fintech_workflow(thread_id: str, about: str, inp: Dict[str, Any]) -> Tuple[str, Dict[str, Any]]:
567
+ meta = run_metadata("fintech", thread_id)
568
+ steps: List[StepTrace] = []
569
+
570
+ s1, _ = run_step(1, FIVE_STEP_TITLES[0], "context_capture", {"about_preview": about[:200]}, lambda: {"about_len": len(about)})
571
+ steps.append(s1)
572
+
573
+ df_raw = fintech_build_row(inp)
574
+ df_clean, prep = fintech_preprocess(df_raw)
575
+ s2, _ = run_step(2, FIVE_STEP_TITLES[1], "fintech_preprocess", {}, lambda: prep)
576
+ steps.append(s2)
577
+
578
+ df_feat = fintech_features(df_clean)
579
+ feat_preview = df_feat[[
580
+ "DTI","Score_Gap","Missed_Norm","Tenure_Norm","Lines_Norm",
581
+ "Savings_to_Income","Collateral_to_Amount","Fraud_Risk","Loyalty_Boost"
582
+ ]].iloc[0].to_dict()
583
+ s3, _ = run_step(3, FIVE_STEP_TITLES[2], "fintech_features", {}, lambda: {"feature_preview": feat_preview})
584
+ steps.append(s3)
585
+
586
+ # Model tool selection: default heuristic; if you later want LLM selector here, it can be added safely.
587
+ chosen_tool = "logreg_synth" if inp.get("use_ml_model", True) else "heuristic"
588
+ if chosen_tool not in FINTECH_TOOL_REGISTRY:
589
+ chosen_tool = "heuristic"
590
+
591
+ def _score():
592
+ fn = FINTECH_TOOL_REGISTRY[chosen_tool]
593
+ if chosen_tool == "heuristic":
594
+ return fn(df_feat) # type: ignore
595
+ return fn(df_feat, seed=DEFAULT_SYNTHETIC_SEED) # type: ignore
596
+
597
+ s4, score = run_step(4, FIVE_STEP_TITLES[3], f"python_dispatch::{chosen_tool}", {}, _score)
598
+ steps.append(s4)
599
+ score = score if isinstance(score, dict) else fintech_tool_heuristic(df_feat)
600
+
601
+ requested_amount = float(inp["requested_amount"])
602
+ s5, rec = run_step(5, FIVE_STEP_TITLES[4], "fintech_recommend", {"requested_amount": requested_amount}, lambda: fintech_recommend(score, requested_amount))
603
+ steps.append(s5)
604
+ rec = rec if isinstance(rec, dict) else {"decision": "Needs Human Review", "hitl_urgency_0_100": 100.0, "prediction_pd": float(score.get("pd_risk", 0.5))}
605
+
606
+ final = {
607
+ "decision": rec["decision"],
608
+ "pd_risk": float(score.get("pd_risk", 0.5)),
609
+ "confidence_0_100": float(score.get("confidence_0_100", 0.0)),
610
+ "hitl_urgency_0_100": float(rec.get("hitl_urgency_0_100", score.get("hitl_urgency_0_100", 100.0))),
611
+ "selected_tool": chosen_tool,
612
+ "auc_test_synth": float(score.get("auc_test_synth", -1.0)),
613
+ }
614
+
615
+ # build step bullets (Python, always non-empty)
616
+ step_bullets = {
617
+ 1: [
618
+ "Captured account context and key request parameters.",
619
+ f"Requested amount = {requested_amount:.0f}, employment = {inp['employment_status']}, fraud_flag = {inp['fraud_flag']}.",
620
+ ],
621
+ 2: [
622
+ "Checked and filled missing values using simple deterministic rules.",
623
+ f"Missing values: before {prep.get('missing_before', {})} → after {prep.get('missing_after', {})}.",
624
+ ],
625
+ 3: [
626
+ "Engineered core risk features (DTI, credit score gap, missed payments, tenure, liquidity, collateral, fraud).",
627
+ "These features act as inputs to the scoring model.",
628
+ ],
629
+ 4: [
630
+ f"Ran scoring tool: {chosen_tool}.",
631
+ f"Produced PD={final['pd_risk']:.3f} and confidence={final['confidence_0_100']:.1f}/100.",
632
+ ],
633
+ 5: [
634
+ "Converted score to an operational decision using HITL urgency and confidence.",
635
+ f"Decision={final['decision']} with HITL_urgency={final['hitl_urgency_0_100']:.1f}/100.",
636
+ ],
637
+ }
638
+
639
+ evidence_md = (
640
+ f"- Tool used: `{chosen_tool}`\n"
641
+ f"- Key engineered features (preview):\n\n```json\n{json.dumps(feat_preview, indent=2)}\n```\n"
642
+ )
643
+ if final.get("auc_test_synth", -1.0) >= 0:
644
+ evidence_md += f"- Synthetic AUC (internal): `{final['auc_test_synth']:.3f}`\n"
645
+
646
+ prediction_bullet = f"Predicted delinquency probability (PD) = {final['pd_risk']:.3f}"
647
+ decision_bullet = f"{final['decision']} (confidence={final['confidence_0_100']:.1f}/100, HITL_urgency={final['hitl_urgency_0_100']:.1f}/100)"
648
+
649
+ explanation_md = build_5step_explanation_md("FinTech Credit Risk", step_bullets, evidence_md, prediction_bullet, decision_bullet)
650
+
651
+ report = (
652
+ "## Result\n"
653
+ f"- Decision: **{final['decision']}**\n"
654
+ f"- Delinquency probability (PD): **{final['pd_risk']:.3f}**\n"
655
+ f"- Confidence score: **{final['confidence_0_100']:.1f}/100**\n"
656
+ f"- HITL urgency: **{final['hitl_urgency_0_100']:.1f}/100**\n"
657
+ f"- Tool used: `{final['selected_tool']}`\n\n"
658
+ f"{explanation_md}"
659
+ )
660
+
661
+ payload = {
662
+ **meta,
663
+ "about": about,
664
+ "inputs": inp,
665
+ "steps": [asdict(x) for x in steps],
666
+ "outputs": {"final": final, "preprocess": prep, "feature_preview": feat_preview},
667
+ }
668
+ LOGGER.append(payload)
669
+ return report, payload
670
+
671
+ def run_te_workflow(thread_id: str, about: str, inp: Dict[str, Any]) -> Tuple[str, Dict[str, Any]]:
672
+ meta = run_metadata("te_pricing", thread_id)
673
+ steps: List[StepTrace] = []
674
+
675
+ s1, _ = run_step(1, FIVE_STEP_TITLES[0], "context_capture", {"about_preview": about[:200]}, lambda: {"about_len": len(about)})
676
+ steps.append(s1)
677
+
678
+ s2, checks = run_step(2, FIVE_STEP_TITLES[1], "te_input_checks", {}, lambda: {
679
+ "checks": {
680
+ "discount_range": 0.0 < float(inp["discount"]) < 0.9,
681
+ "cogs_gt_0": float(inp["cogs"]) > 0,
682
+ "landed_gt_0": float(inp["landed"]) > 0,
683
+ }
684
+ })
685
+ steps.append(s2)
686
+
687
+ # Step 3: derive
688
+ floor = max(7.0 * float(inp["cogs"]), float(inp["presale_mult"]) * float(inp["cogs"]))
689
+ s3, derived = run_step(3, FIVE_STEP_TITLES[2], "te_derive", {}, lambda: {
690
+ "presale_floor": floor,
691
+ "implied_retail_floor": floor / max(1e-6, (1.0 - float(inp["discount"]))),
692
+ "channel": str(inp.get("channel", "")),
693
+ "segment": str(inp.get("target_segment", "")),
694
+ "expected_units": int(inp.get("expected_presale_units", 0)),
695
+ "demand_index": float(inp.get("demand_index", 60.0)),
696
+ "price_sensitivity": float(inp.get("price_sensitivity", 60.0)),
697
+ "price_step": float(inp.get("price_step", 10.0)),
698
+ })
699
+ steps.append(s3)
700
+
701
+ # Step 4: Python pricing heuristic always runs (so not stuck at 42/52)
702
+ s4, base_price = run_step(4, FIVE_STEP_TITLES[3], "pricing_heuristic", {}, lambda: te_pricing_heuristic(inp))
703
+ steps.append(s4)
704
+ base_price = base_price if isinstance(base_price, dict) else te_pricing_heuristic(inp)
705
+
706
+ # Optional LLM adjustment (if key exists)
707
+ client = _client_or_none()
708
+ adj = None
709
+ if client is not None:
710
+ s4b, adj = run_step(4, "LLM Adjustment (optional)", "llm_adjustment", {}, lambda: llm_te_adjustment(client, about, inp, base_price))
711
+ # keep as step 4.5 in trace by using step_no=4 but different title; still 5-step in main trace? We keep it in steps list.
712
+ steps.append(s4b)
713
+ else:
714
+ adj = {"confidence_0_100": 0.0, "rationale_bullets": ["LLM adjustment skipped (no API key)."], "competitor_range": {"low": 0.0, "high": 0.0}}
715
+
716
+ # Apply adjustment deltas safely
717
+ presale = float(base_price["presale_price"]) + float(adj.get("adj_presale_delta", 0.0))
718
+ retail = float(base_price["retail_price"]) + float(adj.get("adj_retail_delta", 0.0))
719
+
720
+ # Re-enforce constraints
721
+ presale = max(presale, floor)
722
+ retail_floor = presale / max(1e-6, (1.0 - float(inp["discount"])))
723
+ retail = max(retail, retail_floor)
724
+
725
+ presale = as_99(presale)
726
+ retail = as_99(retail)
727
+ if retail <= presale:
728
+ retail = as_99(retail_floor)
729
+
730
+ final = {
731
+ "presale_price": presale,
732
+ "retail_price": retail,
733
+ "unit_margin_presale": float(presale - float(inp["landed"])),
734
+ "unit_margin_retail": float(retail - float(inp["landed"])),
735
+ "checks": {
736
+ "presale_ge_7xcogs": presale >= 7.0 * float(inp["cogs"]),
737
+ "presale_ge_floor": presale >= floor,
738
+ "retail_gt_presale": retail > presale,
739
+ "presale_gt_landed": presale > float(inp["landed"]),
740
+ "retail_gt_landed": retail > float(inp["landed"]),
741
+ },
742
+ "llm_confidence_0_100": float(adj.get("confidence_0_100", 0.0)),
743
+ "competitor_range": adj.get("competitor_range", {"low": 0.0, "high": 0.0}),
744
+ "tool_used": "pricing_heuristic (+optional_llm_adjustment)",
745
+ }
746
+
747
+ s5, _ = run_step(5, FIVE_STEP_TITLES[4], "te_finalize", {}, lambda: final)
748
+ steps.append(s5)
749
+
750
+ # Explanation bullets (Python, always non-empty)
751
+ step_bullets = {
752
+ 1: [
753
+ "Captured product/channel/segment context and pricing constraints.",
754
+ f"Channel={inp.get('channel')}, segment={inp.get('target_segment')}, expected_units={int(inp.get('expected_presale_units', 0))}.",
755
+ ],
756
+ 2: [
757
+ "Validated inputs and constraints (COGS, landed, discount range).",
758
+ f"Key constraint: presale floor = max(7×COGS, multiplier×COGS) = {floor:.2f}.",
759
+ ],
760
+ 3: [
761
+ "Derived operational floor prices and retail floor implied by discount.",
762
+ f"Implied retail floor ≈ {floor / max(1e-6, (1.0 - float(inp['discount']))):.2f}.",
763
+ ],
764
+ 4: [
765
+ "Computed a value-based retail anchor using segment + channel + volume signals (Python heuristic).",
766
+ f"Optional LLM adjustment used only if API key exists (LLM_conf={final['llm_confidence_0_100']:.0f}/100).",
767
+ ],
768
+ 5: [
769
+ "Output final recommended presale/retail prices and margins, plus constraint checks.",
770
+ f"Presale={final['presale_price']:.2f}, Retail={final['retail_price']:.2f}.",
771
+ ],
772
+ }
773
+
774
+ evidence_md = (
775
+ f"- Tool used: `{final['tool_used']}`\n"
776
+ f"- Base anchors: `{base_price.get('anchors', {})}`\n"
777
+ f"- Competitor range (optional, no browsing): `{final['competitor_range']}`\n"
778
+ f"- Checks: `{final['checks']}`\n"
779
+ )
780
+ if adj and adj.get("rationale_bullets"):
781
+ evidence_md += "- LLM rationale (optional):\n"
782
+ for b in adj["rationale_bullets"][:7]:
783
+ evidence_md += f" - {b}\n"
784
+
785
+ prediction_bullet = f"Recommended presale={final['presale_price']:.2f}, retail={final['retail_price']:.2f}"
786
+ decision_bullet = "Decision Draft (pricing recommendation ready). Human review recommended if brand/legal constraints are strict."
787
+
788
+ explanation_md = build_5step_explanation_md("TE Pricing", step_bullets, evidence_md, prediction_bullet, decision_bullet)
789
+
790
+ report = (
791
+ "## Result\n"
792
+ f"- Presale price: **{final['presale_price']:.2f}**\n"
793
+ f"- Retail price: **{final['retail_price']:.2f}**\n"
794
+ f"- Margin (presale/retail): **{final['unit_margin_presale']:.2f} / {final['unit_margin_retail']:.2f}**\n"
795
+ f"- Checks: `{final['checks']}`\n"
796
+ f"- Tool used: `{final['tool_used']}`\n\n"
797
+ f"{explanation_md}"
798
+ )
799
+
800
+ payload = {
801
+ **meta,
802
+ "about": about,
803
+ "inputs": inp,
804
+ "steps": [asdict(x) for x in steps],
805
+ "outputs": {"final": final, "derived": derived, "base_price": base_price, "llm_adjustment": adj},
806
+ }
807
+ LOGGER.append(payload)
808
+ return report, payload
809
+
810
+
811
+ # =========================
812
+ # Gradio UI
813
+ # =========================
814
+ def build_gradio_app():
815
+ import gradio as gr
816
+
817
+ STATE: Dict[str, Any] = {"last_payload": None}
818
+ emp_choices = ["Employed", "Self-employed", "Student", "Unemployed", "Retired", "Contract", "Other"]
819
+
820
+ def safe_call(fn):
821
+ try:
822
+ return fn()
823
+ except Exception:
824
+ tb = traceback.format_exc()
825
+ print(tb)
826
+ return "ERROR:\n\n```text\n" + tb + "\n```", ""
827
+
828
+ def ui_fintech(
829
+ about_account,
830
+ income, debt, credit_score,
831
+ employment_status,
832
+ missed_12m, months_on_book, credit_lines,
833
+ requested_amount,
834
+ savings, collateral_value,
835
+ fraud_flag, existing_customer,
836
+ use_ml_model,
837
+ thread_id
838
+ ):
839
+ def _do():
840
+ inp = {
841
+ "income": float(income),
842
+ "debt": float(debt),
843
+ "credit_score": int(credit_score),
844
+ "employment_status": str(employment_status),
845
+ "missed_payments_12m": int(missed_12m),
846
+ "months_on_book": int(months_on_book),
847
+ "credit_lines": int(credit_lines),
848
+ "requested_amount": float(requested_amount),
849
+ "savings": float(savings),
850
+ "collateral_value": float(collateral_value),
851
+ "fraud_flag": int(fraud_flag),
852
+ "existing_customer": int(existing_customer),
853
+ "use_ml_model": bool(use_ml_model),
854
+ }
855
+ report, payload = run_fintech_workflow(str(thread_id), str(about_account or ""), inp)
856
+ STATE["last_payload"] = payload
857
+ return report, payload["run_id"]
858
+ return safe_call(_do)
859
+
860
+ def ui_te(about_product, cogs, landed, presale_mult, discount, demand_index, price_sensitivity, price_step, channel, target_segment, expected_presale_units, thread_id):
861
+ def _do():
862
+ inp = {
863
+ "cogs": float(cogs),
864
+ "landed": float(landed),
865
+ "presale_mult": float(presale_mult),
866
+ "discount": float(discount),
867
+ "demand_index": float(demand_index),
868
+ "price_sensitivity": float(price_sensitivity),
869
+ "price_step": float(price_step),
870
+ "channel": str(channel),
871
+ "target_segment": str(target_segment),
872
+ "expected_presale_units": int(expected_presale_units),
873
+ }
874
+ about_text = str(about_product or "").strip()
875
+ if not about_text:
876
+ about_text = f"channel={inp['channel']}; segment={inp['target_segment']}; expected_presale_units={inp['expected_presale_units']}"
877
+ report, payload = run_te_workflow(str(thread_id), about_text, inp)
878
+ STATE["last_payload"] = payload
879
+ return report, payload["run_id"]
880
+ return safe_call(_do)
881
+
882
+ def ui_current_trace():
883
+ p = STATE.get("last_payload")
884
+ if not p:
885
+ return "No run yet."
886
+ return json.dumps(p, indent=2)
887
+
888
+ def ui_logs(n):
889
+ return json.dumps(LOGGER.tail(int(n)), indent=2)
890
+
891
+ with gr.Blocks(title=APP_TITLE) as demo:
892
+ gr.Markdown("## Demo C")
893
+
894
+ with gr.Row():
895
+ thread_id = gr.Textbox(value="demo_thread", label="thread_id")
896
+
897
+ with gr.Tabs():
898
+ with gr.Tab("FinTech"):
899
+ with gr.Row():
900
+ with gr.Column(scale=5, min_width=480):
901
+ gr.Markdown("### Inputs")
902
+ about_account = gr.Textbox(
903
+ label="About this account (customer context)",
904
+ lines=6,
905
+ placeholder="Type customer/account details: repayment history, special situations, collateral notes, verification notes, etc."
906
+ )
907
+
908
+ employment_status = gr.Dropdown(choices=emp_choices, value="Employed", label="Employment status")
909
+
910
+ with gr.Row():
911
+ income = gr.Number(value=75000, label="Income (annual)")
912
+ debt = gr.Number(value=30000, label="Debt (total)")
913
+ with gr.Row():
914
+ credit_score = gr.Number(value=680, label="Credit score (300-850)")
915
+ requested_amount = gr.Number(value=250000, label="Requested amount")
916
+ with gr.Row():
917
+ missed_12m = gr.Number(value=1, label="Missed payments (12m)")
918
+ months_on_book = gr.Number(value=18, label="Months on book")
919
+ credit_lines = gr.Number(value=4, label="Credit lines")
920
+
921
+ with gr.Row():
922
+ savings = gr.Number(value=8000, label="Savings / liquid assets")
923
+ collateral_value = gr.Number(value=0, label="Collateral value")
924
+
925
+ with gr.Row():
926
+ fraud_flag = gr.Dropdown(choices=[0, 1], value=0, label="Fraud flag (0/1)")
927
+ existing_customer = gr.Dropdown(choices=[0, 1], value=1, label="Existing customer (0/1)")
928
+
929
+ use_ml_model = gr.Checkbox(value=True, label="Use synthetic data")
930
+
931
+ btn = gr.Button("Run", variant="primary")
932
+
933
+ with gr.Column(scale=7, min_width=640):
934
+ gr.Markdown("### Output")
935
+ out = gr.Markdown(value="_(Run to see result.)_")
936
+ run_id_out = gr.Textbox(label="run_id")
937
+
938
+ btn.click(
939
+ fn=ui_fintech,
940
+ inputs=[
941
+ about_account,
942
+ income, debt, credit_score,
943
+ employment_status,
944
+ missed_12m, months_on_book, credit_lines,
945
+ requested_amount,
946
+ savings, collateral_value,
947
+ fraud_flag, existing_customer,
948
+ use_ml_model,
949
+ thread_id
950
+ ],
951
+ outputs=[out, run_id_out],
952
+ )
953
+
954
+ with gr.Tab("TE"):
955
+ with gr.Row():
956
+ with gr.Column(scale=5, min_width=480):
957
+ gr.Markdown("### Inputs")
958
+ about_product = gr.Textbox(
959
+ label="About this product/account (context)",
960
+ lines=6,
961
+ placeholder="Type product + customer context: segment, channel, positioning, constraints, demand signals."
962
+ )
963
+
964
+ with gr.Row():
965
+ cogs = gr.Number(value=6, label="COGS per unit")
966
+ landed = gr.Number(value=10, label="Landed cost per unit")
967
+ with gr.Row():
968
+ presale_mult = gr.Number(value=7, label="Presale floor multiplier")
969
+ discount = gr.Number(value=0.20, label="Discount (0-0.9)")
970
+ with gr.Row():
971
+ demand_index = gr.Slider(0, 100, value=60, step=1, label="Demand level (0-100)")
972
+ price_sensitivity = gr.Slider(0, 100, value=60, step=1, label="Price sensitivity (0-100)")
973
+ price_step = gr.Dropdown(choices=[10, 20, 50], value=10, label="Price step ($)")
974
+ channel = gr.Dropdown(choices=["DTC (Direct-to-Consumer)", "Amazon", "Retail", "Wholesale"], value="DTC", label="Channel")
975
+ target_segment = gr.Dropdown(choices=["Budget", "Mid-market", "Premium"], value="Mid-market", label="Target segment")
976
+ expected_presale_units = gr.Number(value=1000, label="Expected presale units (rough)")
977
+
978
+ btn2 = gr.Button("Run", variant="primary")
979
+
980
+ with gr.Column(scale=7, min_width=640):
981
+ gr.Markdown("### Output")
982
+ out2 = gr.Markdown(value="_(Run to see result.)_")
983
+ run_id_out2 = gr.Textbox(label="run_id")
984
+
985
+ btn2.click(
986
+ fn=ui_te,
987
+ inputs=[about_product, cogs, landed, presale_mult, discount, demand_index, price_sensitivity, price_step, channel, target_segment, expected_presale_units, thread_id],
988
+ outputs=[out2, run_id_out2],
989
+ )
990
+
991
+ with gr.Tab("Trace"):
992
+ gr.Markdown("### Current run (full trace JSON)")
993
+ btn3 = gr.Button("Show current run")
994
+ cur = gr.Code(language="json")
995
+ btn3.click(fn=ui_current_trace, inputs=[], outputs=[cur])
996
+
997
+ gr.Markdown("### Log tail (JSONL)")
998
+ n = gr.Slider(10, 200, value=30, step=10, label="show last N runs")
999
+ btn4 = gr.Button("Refresh logs")
1000
+ logs = gr.Code(language="json")
1001
+ btn4.click(fn=ui_logs, inputs=[n], outputs=[logs])
1002
+
1003
+ return demo
1004
+
1005
+
1006
+ def main():
1007
+ demo = build_gradio_app()
1008
+ demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")), debug=True)
1009
+
1010
+
1011
+ if __name__ == "__main__":
1012
+ main()
data/run_logs.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
demo_c.py ADDED
@@ -0,0 +1,1102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ """
4
+ FINAL DEMO (v10.2): Stable, runs locally + HF Spaces.
5
+
6
+ Design goals (per your 5-step analyst workflow)
7
+ - Output must always be non-empty and human-readable.
8
+ - Trace tab must record: run_id / model_id / version_id / policy_id + 5 steps + tool_used per step.
9
+ - NO LangGraph. NO LLM tool-calling.
10
+ - FinTech:
11
+ - Python computes PD + confidence + HITL urgency (and optional model choice via LLM selector).
12
+ - Output includes explicit prediction + decision bullet.
13
+ - TE Pricing:
14
+ - Primary: Python "pricing_heuristic" (value/segment/channel aware) so it won't stick to 42/52.
15
+ - Optional: LLM proposes a price range and adjustments (no browsing). If LLM fails, we still output a valid result.
16
+ - Hard constraints enforced in Python: presale >= max(7*COGS, mult*COGS), retail > presale.
17
+
18
+ HF Spaces:
19
+ - Rename to app.py
20
+ - Set OPENAI_API_KEY in Space Secrets (optional for TE/FinTech explanation add-on, NOT required for base output)
21
+ """
22
+
23
+ import json
24
+ import math
25
+ import os
26
+ import sys
27
+ import time
28
+ import uuid
29
+ import traceback
30
+ from dataclasses import dataclass, asdict, field
31
+ from datetime import datetime, timezone
32
+ from pathlib import Path
33
+ from typing import Any, Dict, List, Optional, Tuple
34
+
35
+ import numpy as np
36
+ import pandas as pd
37
+
38
+ # OpenAI is OPTIONAL (we never allow "no content" if API fails)
39
+ try:
40
+ from openai import OpenAI
41
+ except Exception:
42
+ OpenAI = None
43
+
44
+ from sklearn.model_selection import train_test_split
45
+ from sklearn.metrics import roc_auc_score
46
+ from sklearn.linear_model import LogisticRegression
47
+ from sklearn.tree import DecisionTreeClassifier
48
+
49
+
50
+ print("\n========== FINAL_APP BOOT (v10.6) ==========")
51
+ print("RUNNING_FILE =", __file__)
52
+ print("CWD =", os.getcwd())
53
+ print("PYTHON =", sys.executable)
54
+ print("OPENAI_MODEL =", os.getenv("OPENAI_MODEL", "gpt-4o-mini"))
55
+ print("OPENAI_API_KEY_SET =", "YES" if bool(os.getenv("OPENAI_API_KEY", "")) else "NO")
56
+ print("===========================================\n")
57
+
58
+
59
+ # =========================
60
+ # Config
61
+ # =========================
62
+ APP_TITLE = "Demo C"
63
+ OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini")
64
+
65
+ AGENT_ID = os.getenv("AGENT_ID", "nexdatawork_demo_agent")
66
+ MODEL_ID = os.getenv("MODEL_ID", "python_5step_traceable")
67
+ VERSION_ID = os.getenv("VERSION_ID", "10.6.0")
68
+
69
+ POLICY_ID = os.getenv("POLICY_ID", "5step_traceable_policy")
70
+ POLICY_VERSION = os.getenv("POLICY_VERSION", "10.6")
71
+
72
+ DATA_DIR = Path(os.getenv("DATA_DIR", "./data"))
73
+ DATA_DIR.mkdir(parents=True, exist_ok=True)
74
+ RUN_LOG_PATH = DATA_DIR / os.getenv("RUN_LOG_PATH", "run_logs.jsonl")
75
+
76
+ DEFAULT_SYNTHETIC_SEED = 42
77
+ HIGH_IMPACT_AMOUNT = float(os.getenv("HIGH_IMPACT_AMOUNT", "1000000"))
78
+
79
+
80
+ # =========================
81
+ # Helpers
82
+ # =========================
83
+ def utc_now() -> str:
84
+ return datetime.now(timezone.utc).replace(microsecond=0).isoformat()
85
+
86
+ def new_id(prefix: str) -> str:
87
+ ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
88
+ return f"{prefix}_{ts}_{uuid.uuid4().hex[:8]}"
89
+
90
+ def clamp(x: float, lo: float, hi: float) -> float:
91
+ return max(lo, min(hi, x))
92
+
93
+ def sigmoid(x: float) -> float:
94
+ if x >= 0:
95
+ z = math.exp(-x)
96
+ return 1.0 / (1.0 + z)
97
+ z = math.exp(x)
98
+ return z / (1.0 + z)
99
+
100
+ def as_99(x: float) -> float:
101
+ v = round(float(x))
102
+ return float(f"{max(v, 1) - 0.01:.2f}")
103
+
104
+ def run_metadata(task_type: str, thread_id: str) -> Dict[str, Any]:
105
+ return {
106
+ "run_id": new_id("run"),
107
+ "agent_id": AGENT_ID,
108
+ "model_id": MODEL_ID,
109
+ "version_id": VERSION_ID,
110
+ "policy_id": POLICY_ID,
111
+ "policy_version": POLICY_VERSION,
112
+ "llm_model": OPENAI_MODEL,
113
+ "task_type": task_type,
114
+ "thread_id": thread_id,
115
+ "timestamps": {"created_at": utc_now()},
116
+ }
117
+
118
+
119
+ # =========================
120
+ # JSONL Logger
121
+ # =========================
122
+ class JSONLLogger:
123
+ def __init__(self, path: Path):
124
+ self.path = path
125
+ self.path.parent.mkdir(parents=True, exist_ok=True)
126
+
127
+ def append(self, payload: Dict[str, Any]) -> None:
128
+ with self.path.open("a", encoding="utf-8") as f:
129
+ f.write(json.dumps(payload, ensure_ascii=False) + "\n")
130
+
131
+ def tail(self, n: int = 50) -> List[Dict[str, Any]]:
132
+ if not self.path.exists():
133
+ return []
134
+ lines = self.path.read_text(encoding="utf-8").splitlines()
135
+ out: List[Dict[str, Any]] = []
136
+ for ln in lines[-n:]:
137
+ try:
138
+ out.append(json.loads(ln))
139
+ except Exception:
140
+ continue
141
+ return out
142
+
143
+ LOGGER = JSONLLogger(RUN_LOG_PATH)
144
+
145
+
146
+ # =========================
147
+ # Trace structures
148
+ # =========================
149
+ @dataclass
150
+ class StepTrace:
151
+ step_id: str
152
+ step_no: int
153
+ title: str
154
+ tool_used: str
155
+ started_at: str
156
+ ended_at: str
157
+ duration_ms: int
158
+ inputs: Dict[str, Any] = field(default_factory=dict)
159
+ outputs: Dict[str, Any] = field(default_factory=dict)
160
+ error: Optional[str] = None
161
+
162
+ def run_step(step_no: int, title: str, tool_used: str, inputs: Dict[str, Any], fn) -> Tuple[StepTrace, Any]:
163
+ step_id = new_id("step")
164
+ started = utc_now()
165
+ t0 = time.time()
166
+ err = None
167
+ out = None
168
+ out_obj: Dict[str, Any] = {}
169
+ try:
170
+ out = fn()
171
+ if isinstance(out, dict):
172
+ out_obj = out
173
+ else:
174
+ out_obj = {"value": out}
175
+ except Exception:
176
+ err = traceback.format_exc()
177
+ ended = utc_now()
178
+ dur = int((time.time() - t0) * 1000)
179
+ return StepTrace(
180
+ step_id=step_id,
181
+ step_no=step_no,
182
+ title=title,
183
+ tool_used=tool_used,
184
+ started_at=started,
185
+ ended_at=ended,
186
+ duration_ms=dur,
187
+ inputs=inputs,
188
+ outputs=out_obj if err is None else {},
189
+ error=err,
190
+ ), out
191
+
192
+
193
+ # =========================
194
+ # Optional LLM helpers (never required)
195
+ # =========================
196
+ def _client_or_none():
197
+ if OpenAI is None:
198
+ return None
199
+ if not os.getenv("OPENAI_API_KEY", ""):
200
+ return None
201
+ try:
202
+ return OpenAI()
203
+ except Exception:
204
+ return None
205
+
206
+ def llm_te_adjustment(client: Any, about: str, inputs: Dict[str, Any], base: Dict[str, Any]) -> Dict[str, Any]:
207
+ """
208
+ Optional: ask LLM for adjustment factor and competitor range.
209
+ Must not break if it fails.
210
+ """
211
+ prompt = {
212
+ "about": about,
213
+ "inputs": inputs,
214
+ "base": base,
215
+ "instruction": (
216
+ "No browsing. Return STRICT JSON: "
217
+ "{"
218
+ "\"adj_presale_delta\": number, "
219
+ "\"adj_retail_delta\": number, "
220
+ "\"competitor_range\": {\"low\": number, \"high\": number}, "
221
+ "\"confidence_0_100\": number, "
222
+ "\"rationale_bullets\": [..]"
223
+ "}. Keep bullets 4-7."
224
+ ),
225
+ }
226
+ try:
227
+ resp = client.chat.completions.create(
228
+ model=OPENAI_MODEL,
229
+ temperature=0.2,
230
+ messages=[
231
+ {"role": "system", "content": "Return STRICT JSON only. No markdown."},
232
+ {"role": "user", "content": json.dumps(prompt)},
233
+ ],
234
+ )
235
+ raw = (resp.choices[0].message.content or "").strip()
236
+ obj = json.loads(raw)
237
+ return {
238
+ "adj_presale_delta": float(obj.get("adj_presale_delta", 0.0)),
239
+ "adj_retail_delta": float(obj.get("adj_retail_delta", 0.0)),
240
+ "competitor_range": obj.get("competitor_range", {}) or {"low": 0.0, "high": 0.0},
241
+ "confidence_0_100": float(clamp(float(obj.get("confidence_0_100", 50.0)), 0.0, 100.0)),
242
+ "rationale_bullets": [str(x)[:200] for x in (obj.get("rationale_bullets", []) or [])][:10],
243
+ "raw": raw[:1400],
244
+ }
245
+ except Exception as e:
246
+ return {
247
+ "error": str(e)[:400],
248
+ "adj_presale_delta": 0.0,
249
+ "adj_retail_delta": 0.0,
250
+ "competitor_range": {"low": 0.0, "high": 0.0},
251
+ "confidence_0_100": 0.0,
252
+ "rationale_bullets": ["LLM adjustment unavailable (API missing or invalid JSON)."],
253
+ }
254
+
255
+
256
+ # =========================
257
+ # FinTech tools
258
+ # =========================
259
+ def fintech_build_row(inp: Dict[str, Any]) -> pd.DataFrame:
260
+ return pd.DataFrame([{
261
+ "Income": inp["income"],
262
+ "Debt": inp["debt"],
263
+ "Credit_Score": inp["credit_score"],
264
+ "Employment_Status": inp["employment_status"],
265
+ "Missed_Payments_12m": inp["missed_payments_12m"],
266
+ "Months_On_Book": inp["months_on_book"],
267
+ "Credit_Lines": inp["credit_lines"],
268
+ "Requested_Amount": inp["requested_amount"],
269
+ "Savings": inp["savings"],
270
+ "Collateral_Value": inp["collateral_value"],
271
+ "Fraud_Flag": inp["fraud_flag"],
272
+ "Existing_Customer": inp["existing_customer"],
273
+ }])
274
+
275
+ def fintech_preprocess(df: pd.DataFrame) -> Tuple[pd.DataFrame, Dict[str, Any]]:
276
+ out = df.copy()
277
+ missing_before = out.isna().sum().astype(int).to_dict()
278
+ for col in out.columns:
279
+ if pd.api.types.is_numeric_dtype(out[col]):
280
+ if out[col].isna().any():
281
+ med = pd.to_numeric(out[col], errors="coerce").median()
282
+ out[col] = pd.to_numeric(out[col], errors="coerce").fillna(med)
283
+ else:
284
+ if out[col].isna().any():
285
+ out[col] = out[col].fillna("Unknown")
286
+ missing_after = out.isna().sum().astype(int).to_dict()
287
+ return out, {"missing_before": missing_before, "missing_after": missing_after}
288
+
289
+ def fintech_features(df: pd.DataFrame) -> pd.DataFrame:
290
+ out = df.copy()
291
+ income = out["Income"].clip(lower=1.0)
292
+ debt = out["Debt"].clip(lower=0.0)
293
+ savings = out["Savings"].clip(lower=0.0)
294
+ collateral = out["Collateral_Value"].clip(lower=0.0)
295
+ amount = out["Requested_Amount"].clip(lower=1.0)
296
+
297
+ out["DTI"] = (debt / income).clip(lower=0, upper=5)
298
+ out["Savings_to_Income"] = (savings / income).clip(lower=0, upper=5)
299
+ out["Collateral_to_Amount"] = (collateral / amount).clip(lower=0, upper=10)
300
+
301
+ out["Score_Gap"] = ((850 - out["Credit_Score"]) / 550).clip(lower=0, upper=1)
302
+ out["Missed_Norm"] = (out["Missed_Payments_12m"].clip(lower=0, upper=12) / 12.0)
303
+ out["Tenure_Norm"] = (out["Months_On_Book"].clip(lower=0, upper=120) / 120.0)
304
+ out["Lines_Norm"] = (out["Credit_Lines"].clip(lower=0, upper=20) / 20.0)
305
+
306
+ emp = out["Employment_Status"].astype(str).str.lower().str.strip()
307
+ emp_w = emp.map({
308
+ "employed": 0.00, "self-employed": 0.05, "student": 0.08,
309
+ "unemployed": 0.18, "retired": 0.04, "contract": 0.06, "other": 0.07
310
+ }).fillna(0.07)
311
+ out["Employment_Risk_Weight"] = emp_w
312
+
313
+ out["Fraud_Risk"] = out["Fraud_Flag"].astype(int).clip(0, 1)
314
+ out["Loyalty_Boost"] = out["Existing_Customer"].astype(int).clip(0, 1)
315
+ return out
316
+
317
+ def fintech_tool_heuristic(df_feat: pd.DataFrame) -> Dict[str, Any]:
318
+ f = df_feat.iloc[0].to_dict()
319
+ x = (
320
+ -1.10
321
+ + 1.50 * f["DTI"]
322
+ + 1.20 * f["Score_Gap"]
323
+ + 0.95 * f["Missed_Norm"]
324
+ + 0.70 * f["Employment_Risk_Weight"]
325
+ - 0.35 * f["Tenure_Norm"]
326
+ - 0.25 * f["Lines_Norm"]
327
+ - 0.45 * f["Savings_to_Income"]
328
+ - 0.35 * f["Collateral_to_Amount"]
329
+ + 2.00 * f["Fraud_Risk"]
330
+ - 0.20 * f["Loyalty_Boost"]
331
+ )
332
+ pd_risk = sigmoid(float(x))
333
+ conf = float(clamp(abs(pd_risk - 0.5) * 200.0, 0.0, 100.0))
334
+ urg = float(clamp((100.0 - conf) * 0.75, 0.0, 100.0))
335
+ return {"tool": "heuristic", "pd_risk": pd_risk, "confidence_0_100": conf, "hitl_urgency_0_100": urg, "linear_x": float(x)}
336
+
337
+ def _fintech_make_synth_training(seed: int = 42, n: int = 1500) -> pd.DataFrame:
338
+ rng = np.random.default_rng(seed)
339
+ income = rng.lognormal(mean=np.log(65000), sigma=0.55, size=n).clip(12000, 250000)
340
+ debt = rng.lognormal(mean=np.log(18000), sigma=0.75, size=n).clip(0, 200000)
341
+ score = rng.integers(300, 851, size=n)
342
+ missed = rng.integers(0, 7, size=n)
343
+ mob = rng.integers(0, 121, size=n)
344
+ lines = rng.integers(0, 21, size=n)
345
+ savings = rng.lognormal(mean=np.log(8000), sigma=0.9, size=n).clip(0, 200000)
346
+ collateral = rng.lognormal(mean=np.log(15000), sigma=0.9, size=n).clip(0, 300000)
347
+ fraud = rng.binomial(1, 0.03, size=n)
348
+ existing = rng.binomial(1, 0.55, size=n)
349
+ emp = rng.choice(["Employed","Self-employed","Student","Unemployed","Retired","Contract","Other"], size=n)
350
+ req_amount = rng.lognormal(mean=np.log(25000), sigma=0.8, size=n).clip(500, 250000)
351
+
352
+ df = pd.DataFrame({
353
+ "Income": income, "Debt": debt, "Credit_Score": score,
354
+ "Employment_Status": emp, "Missed_Payments_12m": missed,
355
+ "Months_On_Book": mob, "Credit_Lines": lines,
356
+ "Requested_Amount": req_amount,
357
+ "Savings": savings,
358
+ "Collateral_Value": collateral,
359
+ "Fraud_Flag": fraud,
360
+ "Existing_Customer": existing,
361
+ })
362
+ df_clean, _ = fintech_preprocess(df)
363
+ df_feat = fintech_features(df_clean)
364
+
365
+ x = (
366
+ -1.10
367
+ + 1.50 * df_feat["DTI"]
368
+ + 1.20 * df_feat["Score_Gap"]
369
+ + 0.95 * df_feat["Missed_Norm"]
370
+ + 0.70 * df_feat["Employment_Risk_Weight"]
371
+ - 0.35 * df_feat["Tenure_Norm"]
372
+ - 0.25 * df_feat["Lines_Norm"]
373
+ - 0.45 * df_feat["Savings_to_Income"]
374
+ - 0.35 * df_feat["Collateral_to_Amount"]
375
+ + 2.00 * df_feat["Fraud_Risk"]
376
+ - 0.20 * df_feat["Loyalty_Boost"]
377
+ )
378
+ p = 1 / (1 + np.exp(-x))
379
+ y = rng.binomial(1, p).astype(int)
380
+
381
+ df_feat = df_feat.copy()
382
+ df_feat["y"] = y
383
+ return df_feat
384
+
385
+ def fintech_tool_logreg_synth(df_case_feat: pd.DataFrame, seed: int = 42) -> Dict[str, Any]:
386
+ train_df = _fintech_make_synth_training(seed=seed, n=1500)
387
+ cols = [
388
+ "DTI","Score_Gap","Missed_Norm","Tenure_Norm","Lines_Norm",
389
+ "Employment_Risk_Weight","Savings_to_Income","Collateral_to_Amount","Fraud_Risk","Loyalty_Boost"
390
+ ]
391
+ X = train_df[cols].astype(float)
392
+ y = train_df["y"].astype(int)
393
+
394
+ X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.25, random_state=seed, stratify=y)
395
+ model = LogisticRegression(max_iter=1000, solver="lbfgs")
396
+ model.fit(X_tr, y_tr)
397
+
398
+ auc = float(roc_auc_score(y_te, model.predict_proba(X_te)[:, 1]))
399
+ case_x = df_case_feat[cols].astype(float)
400
+
401
+ pd_risk = float(model.predict_proba(case_x)[:, 1][0])
402
+ conf = float(clamp(abs(pd_risk - 0.5) * 200.0, 0.0, 100.0))
403
+ urg = float(clamp((100.0 - conf) * 0.75, 0.0, 100.0))
404
+ return {"tool": "logreg_synth", "auc_test_synth": auc, "pd_risk": pd_risk, "confidence_0_100": conf, "hitl_urgency_0_100": urg}
405
+
406
+ def fintech_tool_tree_synth(df_case_feat: pd.DataFrame, seed: int = 42) -> Dict[str, Any]:
407
+ train_df = _fintech_make_synth_training(seed=seed, n=1500)
408
+ cols = [
409
+ "DTI","Score_Gap","Missed_Norm","Tenure_Norm","Lines_Norm",
410
+ "Employment_Risk_Weight","Savings_to_Income","Collateral_to_Amount","Fraud_Risk","Loyalty_Boost"
411
+ ]
412
+ X = train_df[cols].astype(float)
413
+ y = train_df["y"].astype(int)
414
+
415
+ X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.25, random_state=seed, stratify=y)
416
+ model = DecisionTreeClassifier(max_depth=4, random_state=seed)
417
+ model.fit(X_tr, y_tr)
418
+
419
+ auc = float(roc_auc_score(y_te, model.predict_proba(X_te)[:, 1]))
420
+ case_x = df_case_feat[cols].astype(float)
421
+
422
+ pd_risk = float(model.predict_proba(case_x)[:, 1][0])
423
+ conf = float(clamp(abs(pd_risk - 0.5) * 200.0, 0.0, 100.0))
424
+ urg = float(clamp((100.0 - conf) * 0.75, 0.0, 100.0))
425
+ return {"tool": "tree_synth", "auc_test_synth": auc, "pd_risk": pd_risk, "confidence_0_100": conf, "hitl_urgency_0_100": urg}
426
+
427
+ FINTECH_TOOL_REGISTRY = {
428
+ "heuristic": fintech_tool_heuristic,
429
+ "logreg_synth": fintech_tool_logreg_synth,
430
+ "tree_synth": fintech_tool_tree_synth,
431
+ }
432
+
433
+ def fintech_recommend(score: Dict[str, Any], requested_amount: float) -> Dict[str, Any]:
434
+ conf = float(score.get("confidence_0_100", 0.0))
435
+ urg = float(score.get("hitl_urgency_0_100", 100.0))
436
+ bump = 0.0
437
+ if HIGH_IMPACT_AMOUNT > 0 and requested_amount > 0:
438
+ ratio = requested_amount / HIGH_IMPACT_AMOUNT
439
+ bump = 20.0 * clamp(math.log10(ratio + 1.0) / math.log10(11.0), 0.0, 1.0)
440
+ urg2 = float(clamp(urg + bump, 0.0, 100.0))
441
+ decision = "Needs Human Review" if (urg2 >= 60.0 or conf <= 25.0) else "Decision Draft"
442
+ return {"decision": decision, "hitl_urgency_0_100": urg2, "prediction_pd": float(score.get("pd_risk", 0.5))}
443
+
444
+
445
+ # =========================
446
+ # TE pricing heuristic (PRIMARY)
447
+ # =========================
448
+ def te_pricing_heuristic(inp: Dict[str, Any]) -> Dict[str, Any]:
449
+ cogs = float(inp["cogs"])
450
+ landed = float(inp["landed"])
451
+ mult = float(inp["presale_mult"])
452
+ discount = float(inp["discount"])
453
+ channel = str(inp.get("channel", "DTC"))
454
+ segment = str(inp.get("target_segment", "Mid-market"))
455
+ units = int(inp.get("expected_presale_units", 0))
456
+
457
+ # User-friendly demand controls (0-100)
458
+ demand_index = float(inp.get("demand_index", 60.0))
459
+ price_sensitivity = float(inp.get("price_sensitivity", 60.0))
460
+ price_step = float(inp.get("price_step", 10.0))
461
+ demand_index = clamp(demand_index, 0.0, 100.0)
462
+ price_sensitivity = clamp(price_sensitivity, 0.0, 100.0)
463
+ price_step = max(1.0, float(price_step))
464
+
465
+ floor = max(7.0 * cogs, mult * cogs)
466
+
467
+ # segment anchor retail
468
+ # (pure heuristic so the AI demo doesn't get stuck at 42/52)
469
+ if segment.lower().startswith("budget"):
470
+ retail_anchor = 79.99
471
+ elif segment.lower().startswith("premium"):
472
+ retail_anchor = 149.99
473
+ else:
474
+ retail_anchor = 109.99
475
+
476
+ # channel adjustments
477
+ if channel.lower() == "amazon":
478
+ retail_anchor -= 10.0
479
+ elif channel.lower() == "retail":
480
+ retail_anchor += 10.0
481
+ elif channel.lower() == "wholesale":
482
+ retail_anchor -= 15.0
483
+
484
+ # volume signal: more units -> can accept slightly lower retail
485
+ if units >= 5000:
486
+ retail_anchor -= 8.0
487
+ elif units >= 2000:
488
+ retail_anchor -= 4.0
489
+ elif units > 0 and units < 300:
490
+ retail_anchor += 6.0
491
+
492
+ # Demand controls: higher demand_index supports higher willingness-to-pay; higher sensitivity pushes price down.
493
+ retail_anchor += (demand_index - 50.0) * 0.3 # up to about +/-15
494
+ retail_anchor -= (price_sensitivity - 50.0) * 0.2 # up to about +/-10
495
+
496
+ retail_floor_from_discount = floor / max(1e-6, (1.0 - discount))
497
+ retail = max(retail_anchor, retail_floor_from_discount)
498
+
499
+ presale = retail * (1.0 - discount)
500
+ presale = max(presale, floor)
501
+
502
+ presale = as_99(presale)
503
+ retail = as_99(retail)
504
+
505
+ # ensure retail>presale strictly
506
+ if retail <= presale:
507
+ retail = as_99(presale / max(1e-6, (1.0 - discount)))
508
+
509
+ checks = {
510
+ "presale_ge_7xcogs": presale >= 7.0 * cogs,
511
+ "presale_ge_floor": presale >= floor,
512
+ "retail_gt_presale": retail > presale,
513
+ "presale_gt_landed": presale > landed,
514
+ "retail_gt_landed": retail > landed,
515
+ }
516
+
517
+ return {
518
+ "tool": "pricing_heuristic",
519
+ "floor": floor,
520
+ "presale_price": presale,
521
+ "retail_price": retail,
522
+ "unit_margin_presale": float(presale - landed),
523
+ "unit_margin_retail": float(retail - landed),
524
+ "checks": checks,
525
+ "anchors": {"segment": segment, "channel": channel, "retail_anchor": retail_anchor, "units": units, "demand_index": demand_index, "price_sensitivity": price_sensitivity, "price_step": price_step},
526
+ }
527
+
528
+
529
+ # =========================
530
+ # 5-step explanation builder (Python, always non-empty)
531
+ # =========================
532
+ def build_5step_explanation_md(title: str, step_bullets: Dict[int, List[str]], evidence_md: str, prediction_bullet: str, decision_bullet: str) -> str:
533
+ md: List[str] = []
534
+ md.append("## Explanation")
535
+ # Use professional workflow section titles instead of "Step 1..5"
536
+ for i in range(1, 6):
537
+ section_title = FIVE_STEP_TITLES[i - 1]
538
+ md.append(f"### {section_title}")
539
+ blt = step_bullets.get(i, [])
540
+ if not blt:
541
+ blt = ["(no content)"]
542
+ for b in blt[:8]:
543
+ md.append(f"- {b}")
544
+ md.append("")
545
+ md.append("## Evidence")
546
+ md.append(evidence_md.strip() if evidence_md.strip() else "- (no evidence)")
547
+ md.append("")
548
+ md.append("## Prediction")
549
+ md.append(f"- {prediction_bullet}")
550
+ md.append("")
551
+ md.append("## Decision")
552
+ md.append(f"- {decision_bullet}")
553
+ return "\n".join(md).strip()
554
+
555
+
556
+
557
+ # =========================
558
+ # Simple visualizations (matplotlib)
559
+ # =========================
560
+ def plot_fintech_pd_bar(pd_risk: float):
561
+ """
562
+ Gauge-style horizontal bar (0-100%).
563
+ Robust for a single probability value.
564
+ """
565
+ import matplotlib.pyplot as plt
566
+
567
+ pd_risk = float(clamp(pd_risk, 0.0, 1.0))
568
+ pct = pd_risk * 100.0
569
+
570
+ fig = plt.figure(figsize=(6.0, 1.6))
571
+ ax = fig.add_subplot(111)
572
+
573
+ # Background track (100%)
574
+ ax.barh([0], [100], height=0.5, color="#e6e6e6")
575
+
576
+ # Foreground value
577
+ ax.barh([0], [pct], height=0.5, color="#1f77b4")
578
+
579
+ ax.set_xlim(0, 100)
580
+ ax.set_yticks([])
581
+ ax.set_xlabel("PD (%)")
582
+ ax.set_title(f"Delinquency probability (PD): {pct:.1f}%")
583
+
584
+ # Label at the end of the filled bar
585
+ ax.text(min(pct + 2, 98), 0, f"{pct:.1f}%", va="center", ha="left", fontsize=11)
586
+
587
+ ax.grid(axis="x", linestyle="--", alpha=0.3)
588
+ fig.tight_layout()
589
+ return fig
590
+
591
+ def plot_te_price_stacked(presale: float, retail: float):
592
+ """
593
+ Overlay bars: Retail is the taller bar; Presale is the shorter bar on top (same x).
594
+ Also annotate values on bars. Keep it minimal and robust.
595
+ """
596
+ import matplotlib.pyplot as plt
597
+
598
+ presale = float(max(presale, 0.0))
599
+ retail = float(max(retail, presale))
600
+
601
+ fig = plt.figure(figsize=(6.0, 2.2))
602
+ ax = fig.add_subplot(111)
603
+
604
+ x = [0]
605
+ # Draw retail first (background), then presale (foreground)
606
+ bars_retail = ax.bar(x, [retail], width=0.6, color="#9f1bdd", label="Retail")
607
+ bars_presale = ax.bar(x, [presale], width=0.6, color="#4d74f3", label="Presale")
608
+
609
+ ax.set_xticks(x)
610
+ ax.set_xticklabels(["Price"])
611
+ ax.set_ylabel("$")
612
+ ax.set_title("Presale vs Retail")
613
+ ax.grid(axis="y", linestyle="--", alpha=0.3)
614
+ ymax = max(retail, presale)
615
+ ax.set_ylim(0, ymax * 1.25)
616
+
617
+ # Put legend outside the plot area on the right
618
+ ax.legend(loc="center left", bbox_to_anchor=(1.02, 0.5), frameon=False)
619
+
620
+ # Annotate values
621
+ def annotate(bar, value: float):
622
+ rect = bar[0]
623
+ ax.text(
624
+ rect.get_x() + rect.get_width() / 2.0,
625
+ rect.get_height() + max(1.0, 0.02 * retail),
626
+ f"${value:.2f}",
627
+ ha="center",
628
+ va="bottom",
629
+ fontsize=10,
630
+ )
631
+
632
+ annotate(bars_retail, retail)
633
+ annotate(bars_presale, presale)
634
+
635
+ fig.tight_layout()
636
+ return fig
637
+
638
+ # =========================
639
+ # Workflows
640
+ # =========================
641
+ FIVE_STEP_TITLES = [
642
+ "Interpreting Context & Metrics (Questions + EDA)",
643
+ "Pre-Processing Data (Cleaning + Missing Values)",
644
+ "Processing Data (Transform + Feature Build)",
645
+ "Analyzing Data (Modeling + Scoring)",
646
+ "Trend Analysis & Predictions (Decision + Share)",
647
+ ]
648
+ def run_fintech_workflow(thread_id: str, about: str, inp: Dict[str, Any]) -> Tuple[str, Dict[str, Any]]:
649
+ meta = run_metadata("fintech", thread_id)
650
+ steps: List[StepTrace] = []
651
+
652
+ s1, _ = run_step(1, FIVE_STEP_TITLES[0], "context_capture", {"about_preview": about[:200]}, lambda: {"about_len": len(about)})
653
+ steps.append(s1)
654
+
655
+ df_raw = fintech_build_row(inp)
656
+ df_clean, prep = fintech_preprocess(df_raw)
657
+ s2, _ = run_step(2, FIVE_STEP_TITLES[1], "fintech_preprocess", {}, lambda: prep)
658
+ steps.append(s2)
659
+
660
+ df_feat = fintech_features(df_clean)
661
+ feat_preview = df_feat[[
662
+ "DTI","Score_Gap","Missed_Norm","Tenure_Norm","Lines_Norm",
663
+ "Savings_to_Income","Collateral_to_Amount","Fraud_Risk","Loyalty_Boost"
664
+ ]].iloc[0].to_dict()
665
+ s3, _ = run_step(3, FIVE_STEP_TITLES[2], "fintech_features", {}, lambda: {"feature_preview": feat_preview})
666
+ steps.append(s3)
667
+
668
+ # Model tool selection: default heuristic; if you later want LLM selector here, it can be added safely.
669
+ chosen_tool = "logreg_synth" if inp.get("use_ml_model", True) else "heuristic"
670
+ if chosen_tool not in FINTECH_TOOL_REGISTRY:
671
+ chosen_tool = "heuristic"
672
+
673
+ def _score():
674
+ fn = FINTECH_TOOL_REGISTRY[chosen_tool]
675
+ if chosen_tool == "heuristic":
676
+ return fn(df_feat) # type: ignore
677
+ return fn(df_feat, seed=DEFAULT_SYNTHETIC_SEED) # type: ignore
678
+
679
+ s4, score = run_step(4, FIVE_STEP_TITLES[3], f"python_dispatch::{chosen_tool}", {}, _score)
680
+ steps.append(s4)
681
+ score = score if isinstance(score, dict) else fintech_tool_heuristic(df_feat)
682
+
683
+ requested_amount = float(inp["requested_amount"])
684
+ s5, rec = run_step(5, FIVE_STEP_TITLES[4], "fintech_recommend + viz_pd_bar", {"requested_amount": requested_amount}, lambda: fintech_recommend(score, requested_amount))
685
+ steps.append(s5)
686
+ rec = rec if isinstance(rec, dict) else {"decision": "Needs Human Review", "hitl_urgency_0_100": 100.0, "prediction_pd": float(score.get("pd_risk", 0.5))}
687
+
688
+ final = {
689
+ "decision": rec["decision"],
690
+ "pd_risk": float(score.get("pd_risk", 0.5)),
691
+ "confidence_0_100": float(score.get("confidence_0_100", 0.0)),
692
+ "hitl_urgency_0_100": float(rec.get("hitl_urgency_0_100", score.get("hitl_urgency_0_100", 100.0))),
693
+ "selected_tool": chosen_tool,
694
+ "auc_test_synth": float(score.get("auc_test_synth", -1.0)),
695
+ }
696
+
697
+ # build step bullets (Python, always non-empty)
698
+ step_bullets = {
699
+ 1: [
700
+ "Captured account context and key request parameters.",
701
+ f"Requested amount = {requested_amount:.0f}, employment = {inp['employment_status']}, fraud_flag = {inp['fraud_flag']}.",
702
+ ],
703
+ 2: [
704
+ "Checked and filled missing values using simple deterministic rules.",
705
+ f"Missing values: before {prep.get('missing_before', {})} → after {prep.get('missing_after', {})}.",
706
+ ],
707
+ 3: [
708
+ "Engineered core risk features (DTI, credit score gap, missed payments, tenure, liquidity, collateral, fraud).",
709
+ "These features act as inputs to the scoring model.",
710
+ ],
711
+ 4: [
712
+ f"Ran scoring tool: {chosen_tool}.",
713
+ f"Produced PD={final['pd_risk']*100:.1f}% and confidence={final['confidence_0_100']:.1f}/100.",
714
+ ],
715
+ 5: [
716
+ "Converted score to an operational decision using HITL urgency and confidence.",
717
+ f"Decision={final['decision']} with HITL_urgency={final['hitl_urgency_0_100']:.1f}/100.",
718
+ ],
719
+ }
720
+
721
+ evidence_md = (
722
+ f"- Tool used: `{chosen_tool}`\n"
723
+ f"- Key engineered features (preview):\n\n```json\n{json.dumps(feat_preview, indent=2)}\n```\n"
724
+ )
725
+ if final.get("auc_test_synth", -1.0) >= 0:
726
+ evidence_md += f"- Synthetic AUC (internal): `{final['auc_test_synth']:.3f}`\n"
727
+
728
+ prediction_bullet = f"Predicted delinquency probability (PD) = {final['pd_risk']*100:.1f}%"
729
+ decision_bullet = f"{final['decision']} (confidence={final['confidence_0_100']:.1f}/100, HITL_urgency={final['hitl_urgency_0_100']:.1f}/100)"
730
+
731
+ explanation_md = build_5step_explanation_md("FinTech Credit Risk", step_bullets, evidence_md, prediction_bullet, decision_bullet)
732
+
733
+ report = (
734
+ "## Result\n"
735
+ f"- Decision: **{final['decision']}**\n"
736
+ f"- Delinquency probability (PD): **{final['pd_risk']*100:.1f}%**\n"
737
+ f"- Confidence score: **{final['confidence_0_100']:.1f}/100**\n"
738
+ f"- HITL urgency: **{final['hitl_urgency_0_100']:.1f}/100**\n"
739
+ f"- Tool used: `{final['selected_tool']}`\n\n"
740
+ f"{explanation_md}"
741
+ )
742
+
743
+ payload = {
744
+ **meta,
745
+ "about": about,
746
+ "inputs": inp,
747
+ "steps": [asdict(x) for x in steps],
748
+ "outputs": {"final": final, "preprocess": prep, "feature_preview": feat_preview},
749
+ }
750
+ LOGGER.append(payload)
751
+ return report, payload
752
+
753
+ def run_te_workflow(thread_id: str, about: str, inp: Dict[str, Any]) -> Tuple[str, Dict[str, Any]]:
754
+ meta = run_metadata("te_pricing", thread_id)
755
+ steps: List[StepTrace] = []
756
+
757
+ s1, _ = run_step(1, FIVE_STEP_TITLES[0], "context_capture", {"about_preview": about[:200]}, lambda: {"about_len": len(about)})
758
+ steps.append(s1)
759
+
760
+ s2, checks = run_step(2, FIVE_STEP_TITLES[1], "te_input_checks", {}, lambda: {
761
+ "checks": {
762
+ "discount_range": 0.0 < float(inp["discount"]) < 0.9,
763
+ "cogs_gt_0": float(inp["cogs"]) > 0,
764
+ "landed_gt_0": float(inp["landed"]) > 0,
765
+ }
766
+ })
767
+ steps.append(s2)
768
+
769
+ # Step 3: derive
770
+ floor = max(7.0 * float(inp["cogs"]), float(inp["presale_mult"]) * float(inp["cogs"]))
771
+ s3, derived = run_step(3, FIVE_STEP_TITLES[2], "te_derive", {}, lambda: {
772
+ "presale_floor": floor,
773
+ "implied_retail_floor": floor / max(1e-6, (1.0 - float(inp["discount"]))),
774
+ "channel": str(inp.get("channel", "")),
775
+ "segment": str(inp.get("target_segment", "")),
776
+ "expected_units": int(inp.get("expected_presale_units", 0)),
777
+ "demand_index": float(inp.get("demand_index", 60.0)),
778
+ "price_sensitivity": float(inp.get("price_sensitivity", 60.0)),
779
+ "price_step": float(inp.get("price_step", 10.0)),
780
+ })
781
+ steps.append(s3)
782
+
783
+ # Step 4: Python pricing heuristic always runs (so not stuck at 42/52)
784
+ s4, base_price = run_step(4, FIVE_STEP_TITLES[3], "pricing_heuristic", {}, lambda: te_pricing_heuristic(inp))
785
+ steps.append(s4)
786
+ base_price = base_price if isinstance(base_price, dict) else te_pricing_heuristic(inp)
787
+
788
+ # Optional LLM adjustment (if key exists)
789
+ client = _client_or_none()
790
+ adj = None
791
+ if client is not None:
792
+ s4b, adj = run_step(4, "LLM Adjustment (optional)", "llm_adjustment", {}, lambda: llm_te_adjustment(client, about, inp, base_price))
793
+ # keep as step 4.5 in trace by using step_no=4 but different title; still 5-step in main trace? We keep it in steps list.
794
+ steps.append(s4b)
795
+ else:
796
+ adj = {"confidence_0_100": 0.0, "rationale_bullets": ["LLM adjustment skipped (no API key)."], "competitor_range": {"low": 0.0, "high": 0.0}}
797
+
798
+ # Apply adjustment deltas safely
799
+ presale = float(base_price["presale_price"]) + float(adj.get("adj_presale_delta", 0.0))
800
+ retail = float(base_price["retail_price"]) + float(adj.get("adj_retail_delta", 0.0))
801
+
802
+ # Re-enforce constraints
803
+ presale = max(presale, floor)
804
+ retail_floor = presale / max(1e-6, (1.0 - float(inp["discount"])))
805
+ retail = max(retail, retail_floor)
806
+
807
+ presale = as_99(presale)
808
+ retail = as_99(retail)
809
+ if retail <= presale:
810
+ retail = as_99(retail_floor)
811
+
812
+ final = {
813
+ "presale_price": presale,
814
+ "retail_price": retail,
815
+ "unit_margin_presale": float(presale - float(inp["landed"])),
816
+ "unit_margin_retail": float(retail - float(inp["landed"])),
817
+ "checks": {
818
+ "presale_ge_7xcogs": presale >= 7.0 * float(inp["cogs"]),
819
+ "presale_ge_floor": presale >= floor,
820
+ "retail_gt_presale": retail > presale,
821
+ "presale_gt_landed": presale > float(inp["landed"]),
822
+ "retail_gt_landed": retail > float(inp["landed"]),
823
+ },
824
+ "llm_confidence_0_100": float(adj.get("confidence_0_100", 0.0)),
825
+ "competitor_range": adj.get("competitor_range", {"low": 0.0, "high": 0.0}),
826
+ "tool_used": "pricing_heuristic (+optional_llm_adjustment)",
827
+ }
828
+
829
+ s5, _ = run_step(5, FIVE_STEP_TITLES[4], "te_finalize + viz_price_stacked", {}, lambda: final)
830
+ steps.append(s5)
831
+
832
+ # Explanation bullets (Python, always non-empty)
833
+ step_bullets = {
834
+ 1: [
835
+ "Captured product/channel/segment context and pricing constraints.",
836
+ f"Channel={inp.get('channel')}, segment={inp.get('target_segment')}, expected_units={int(inp.get('expected_presale_units', 0))}.",
837
+ ],
838
+ 2: [
839
+ "Validated inputs and constraints (COGS, landed, discount range).",
840
+ f"Key constraint: presale floor = max(7×COGS, multiplier×COGS) = {floor:.2f}.",
841
+ ],
842
+ 3: [
843
+ "Derived operational floor prices and retail floor implied by discount.",
844
+ f"Implied retail floor ≈ {floor / max(1e-6, (1.0 - float(inp['discount']))):.2f}.",
845
+ ],
846
+ 4: [
847
+ "Computed a value-based retail anchor using segment + channel + volume signals (Python heuristic).",
848
+ f"Optional LLM adjustment used only if API key exists (LLM_conf={final['llm_confidence_0_100']:.0f}/100).",
849
+ ],
850
+ 5: [
851
+ "Output final recommended presale/retail prices and margins, plus constraint checks.",
852
+ f"Presale={final['presale_price']:.2f}, Retail={final['retail_price']:.2f}.",
853
+ ],
854
+ }
855
+
856
+ evidence_md = (
857
+ f"- Tool used: `{final['tool_used']}`\n"
858
+ f"- Base anchors: `{base_price.get('anchors', {})}`\n"
859
+ f"- Competitor range (optional, no browsing): `{final['competitor_range']}`\n"
860
+ f"- Checks: `{final['checks']}`\n"
861
+ )
862
+ if adj and adj.get("rationale_bullets"):
863
+ evidence_md += "- LLM rationale (optional):\n"
864
+ for b in adj["rationale_bullets"][:7]:
865
+ evidence_md += f" - {b}\n"
866
+
867
+ prediction_bullet = f"Recommended presale={final['presale_price']:.2f}, retail={final['retail_price']:.2f}"
868
+ decision_bullet = "Decision Draft (pricing recommendation ready). Human review recommended if brand/legal constraints are strict."
869
+
870
+ explanation_md = build_5step_explanation_md("TE Pricing", step_bullets, evidence_md, prediction_bullet, decision_bullet)
871
+
872
+ report = (
873
+ "## Result\n"
874
+ f"- Presale price: **{final['presale_price']:.2f}**\n"
875
+ f"- Retail price: **{final['retail_price']:.2f}**\n"
876
+ f"- Margin (presale/retail): **{final['unit_margin_presale']:.2f} / {final['unit_margin_retail']:.2f}**\n"
877
+ f"- Checks: `{final['checks']}`\n"
878
+ f"- Tool used: `{final['tool_used']}`\n\n"
879
+ f"{explanation_md}"
880
+ )
881
+
882
+ payload = {
883
+ **meta,
884
+ "about": about,
885
+ "inputs": inp,
886
+ "steps": [asdict(x) for x in steps],
887
+ "outputs": {"final": final, "derived": derived, "base_price": base_price, "llm_adjustment": adj},
888
+ }
889
+ LOGGER.append(payload)
890
+ return report, payload
891
+
892
+
893
+ # =========================
894
+ # Gradio UI
895
+ # =========================
896
+ def build_gradio_app():
897
+ import gradio as gr
898
+
899
+ STATE: Dict[str, Any] = {"last_payload": None}
900
+ emp_choices = ["Employed", "Self-employed", "Student", "Unemployed", "Retired", "Contract", "Other"]
901
+
902
+ def safe_call(fn):
903
+ try:
904
+ return fn()
905
+ except Exception:
906
+ tb = traceback.format_exc()
907
+ print(tb)
908
+ return "ERROR:\n\n```text\n" + tb + "\n```", None, ""
909
+
910
+ def ui_fintech(
911
+ about_account,
912
+ income, debt, credit_score,
913
+ employment_status,
914
+ missed_12m, months_on_book, credit_lines,
915
+ requested_amount,
916
+ savings, collateral_value,
917
+ fraud_flag, existing_customer,
918
+ use_ml_model,
919
+ thread_id
920
+ ):
921
+ def _do():
922
+ inp = {
923
+ "income": float(income),
924
+ "debt": float(debt),
925
+ "credit_score": int(credit_score),
926
+ "employment_status": str(employment_status),
927
+ "missed_payments_12m": int(missed_12m),
928
+ "months_on_book": int(months_on_book),
929
+ "credit_lines": int(credit_lines),
930
+ "requested_amount": float(requested_amount),
931
+ "savings": float(savings),
932
+ "collateral_value": float(collateral_value),
933
+ "fraud_flag": int(fraud_flag),
934
+ "existing_customer": int(existing_customer),
935
+ "use_ml_model": bool(use_ml_model),
936
+ }
937
+ report, payload = run_fintech_workflow(str(thread_id), str(about_account or ""), inp)
938
+ STATE["last_payload"] = payload
939
+ pd_val = float((payload.get("outputs") or {}).get("final", {}).get("pd_risk", 0.0))
940
+ fig = plot_fintech_pd_bar(pd_val)
941
+ return report, fig, payload["run_id"]
942
+ return safe_call(_do)
943
+
944
+ def ui_te(about_product, cogs, landed, presale_mult, discount, demand_index, price_sensitivity, price_step, channel, target_segment, expected_presale_units, thread_id):
945
+ def _do():
946
+ inp = {
947
+ "cogs": float(cogs),
948
+ "landed": float(landed),
949
+ "presale_mult": float(presale_mult),
950
+ "discount": float(discount),
951
+ "demand_index": float(demand_index),
952
+ "price_sensitivity": float(price_sensitivity),
953
+ "price_step": float(price_step),
954
+ "channel": str(channel),
955
+ "target_segment": str(target_segment),
956
+ "expected_presale_units": int(expected_presale_units),
957
+ }
958
+ about_text = str(about_product or "").strip()
959
+ if not about_text:
960
+ about_text = f"channel={inp['channel']}; segment={inp['target_segment']}; expected_presale_units={inp['expected_presale_units']}"
961
+ report, payload = run_te_workflow(str(thread_id), about_text, inp)
962
+ STATE["last_payload"] = payload
963
+ final = (payload.get("outputs") or {}).get("final", {}) or {}
964
+ presale = float(final.get("presale_price", 0.0))
965
+ retail = float(final.get("retail_price", 0.0))
966
+ fig = plot_te_price_stacked(presale, retail)
967
+ return report, fig, payload["run_id"]
968
+ return safe_call(_do)
969
+
970
+ def ui_current_trace():
971
+ p = STATE.get("last_payload")
972
+ if not p:
973
+ return "No run yet."
974
+ return json.dumps(p, indent=2)
975
+
976
+ def ui_logs(n):
977
+ return json.dumps(LOGGER.tail(int(n)), indent=2)
978
+
979
+ with gr.Blocks(title=APP_TITLE) as demo:
980
+ gr.Markdown("## Demo C")
981
+
982
+ with gr.Row():
983
+ thread_id = gr.Textbox(value="demo_thread", label="thread_id")
984
+
985
+ with gr.Tabs():
986
+ with gr.Tab("FinTech"):
987
+ with gr.Row():
988
+ with gr.Column(scale=5, min_width=480):
989
+ gr.Markdown("### Inputs")
990
+ about_account = gr.Textbox(
991
+ label="About this account (customer context)",
992
+ lines=6,
993
+ placeholder="Type customer/account details: repayment history, special situations, collateral notes, verification notes, etc."
994
+ )
995
+
996
+ employment_status = gr.Dropdown(choices=emp_choices, value="Employed", label="Employment status")
997
+
998
+ with gr.Row():
999
+ income = gr.Number(value=75000, label="Income (annual)")
1000
+ debt = gr.Number(value=30000, label="Debt (total)")
1001
+ with gr.Row():
1002
+ credit_score = gr.Number(value=680, label="Credit score (300-850)")
1003
+ requested_amount = gr.Number(value=250000, label="Requested amount")
1004
+ with gr.Row():
1005
+ missed_12m = gr.Number(value=1, label="Missed payments (12m)")
1006
+ months_on_book = gr.Number(value=18, label="Months on book")
1007
+ credit_lines = gr.Number(value=4, label="Credit lines")
1008
+
1009
+ with gr.Row():
1010
+ savings = gr.Number(value=8000, label="Savings / liquid assets")
1011
+ collateral_value = gr.Number(value=0, label="Collateral value")
1012
+
1013
+ with gr.Row():
1014
+ fraud_flag = gr.Dropdown(choices=[0, 1], value=0, label="Fraud flag (0/1)")
1015
+ existing_customer = gr.Dropdown(choices=[0, 1], value=1, label="Existing customer (0/1)")
1016
+
1017
+ use_ml_model = gr.Checkbox(value=True, label="Use synthetic data")
1018
+
1019
+ btn = gr.Button("Run", variant="primary")
1020
+
1021
+ with gr.Column(scale=7, min_width=640):
1022
+ gr.Markdown("### Output")
1023
+ out = gr.Markdown(value="_(Run to see result.)_")
1024
+ fintech_plot = gr.Plot(label="PD visualization")
1025
+ run_id_out = gr.Textbox(label="run_id")
1026
+
1027
+ btn.click(
1028
+ fn=ui_fintech,
1029
+ inputs=[
1030
+ about_account,
1031
+ income, debt, credit_score,
1032
+ employment_status,
1033
+ missed_12m, months_on_book, credit_lines,
1034
+ requested_amount,
1035
+ savings, collateral_value,
1036
+ fraud_flag, existing_customer,
1037
+ use_ml_model,
1038
+ thread_id
1039
+ ],
1040
+ outputs=[out, fintech_plot, run_id_out],
1041
+ )
1042
+
1043
+ with gr.Tab("TE"):
1044
+ with gr.Row():
1045
+ with gr.Column(scale=5, min_width=480):
1046
+ gr.Markdown("### Inputs")
1047
+ about_product = gr.Textbox(
1048
+ label="About this product/account (context)",
1049
+ lines=6,
1050
+ placeholder="Type product + customer context: segment, channel, positioning, constraints, demand signals."
1051
+ )
1052
+
1053
+ with gr.Row():
1054
+ cogs = gr.Number(value=6, label="COGS per unit")
1055
+ landed = gr.Number(value=10, label="Landed cost per unit")
1056
+ with gr.Row():
1057
+ presale_mult = gr.Number(value=7, label="Presale floor multiplier")
1058
+ discount = gr.Number(value=0.20, label="Discount (0-0.9)")
1059
+ with gr.Row():
1060
+ demand_index = gr.Slider(0, 100, value=60, step=1, label="Demand level (0-100)")
1061
+ price_sensitivity = gr.Slider(0, 100, value=60, step=1, label="Price sensitivity (0-100)")
1062
+ price_step = gr.Dropdown(choices=[10, 20, 50], value=10, label="Price step ($)")
1063
+ channel = gr.Dropdown(choices=["DTC (Direct-to-Consumer)", "Amazon", "Retail", "Wholesale"], value="DTC", label="Channel")
1064
+ target_segment = gr.Dropdown(choices=["Budget", "Mid-market", "Premium"], value="Mid-market", label="Target segment")
1065
+ expected_presale_units = gr.Number(value=1000, label="Expected presale units (rough)")
1066
+
1067
+ btn2 = gr.Button("Run", variant="primary")
1068
+
1069
+ with gr.Column(scale=7, min_width=640):
1070
+ gr.Markdown("### Output")
1071
+ out2 = gr.Markdown(value="_(Run to see result.)_")
1072
+ te_plot = gr.Plot(label="Price visualization")
1073
+ run_id_out2 = gr.Textbox(label="run_id")
1074
+
1075
+ btn2.click(
1076
+ fn=ui_te,
1077
+ inputs=[about_product, cogs, landed, presale_mult, discount, demand_index, price_sensitivity, price_step, channel, target_segment, expected_presale_units, thread_id],
1078
+ outputs=[out2, te_plot, run_id_out2],
1079
+ )
1080
+
1081
+ with gr.Tab("Trace"):
1082
+ gr.Markdown("### Current run (full trace JSON)")
1083
+ btn3 = gr.Button("Show current run")
1084
+ cur = gr.Code(language="json")
1085
+ btn3.click(fn=ui_current_trace, inputs=[], outputs=[cur])
1086
+
1087
+ gr.Markdown("### Log tail (JSONL)")
1088
+ n = gr.Slider(10, 200, value=30, step=10, label="show last N runs")
1089
+ btn4 = gr.Button("Refresh logs")
1090
+ logs = gr.Code(language="json")
1091
+ btn4.click(fn=ui_logs, inputs=[n], outputs=[logs])
1092
+
1093
+ return demo
1094
+
1095
+
1096
+ def main():
1097
+ demo = build_gradio_app()
1098
+ demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")), debug=True)
1099
+
1100
+
1101
+ if __name__ == "__main__":
1102
+ main()
framework_demo_b.py CHANGED
@@ -1,81 +1,64 @@
1
- """
2
- LangGraph + LangChain Framework Demo (Stateful + Traceable) with OpenAI
3
-
4
- This version is closer to LangChain/LangGraph "agent + memory" patterns:
5
- - LangGraph ReAct agent (tool-calling) with short-term memory via checkpointer (thread_id)
6
- - Traceable run logs: run_id, model_id, version_id, policy_id, step traces, evidence
7
- - Two business workflows:
8
- 1) FinTech credit risk demo (single case) + HITL policy routing
9
- 2) TE consumer product pricing (presale + retail) + constraint checks + benchmark draft (LLM, no web)
10
-
11
- Gradio:
12
- - Tab 1: FinTech (form -> agent decides tools -> structured output + explanation)
13
- - Tab 2: TE Pricing (form -> agent decides tools -> structured output + explanation)
14
- - Tab 3: Logs (tail)
15
-
16
- Hugging Face:
17
- - Rename this file to app.py
18
- - Use requirements_langgraph.txt as requirements.txt
19
- - Add OPENAI_API_KEY in Space secrets
20
-
21
- Security:
22
- - This file redacts API keys from logs and UI outputs.
23
-
24
- Note:
25
- - "Benchmark research" here is AI-generated only. No browsing. Treat as draft until verified.
26
-
27
- """
28
-
29
  from __future__ import annotations
30
 
31
  import json
 
32
  import os
33
- import re
34
  import time
35
  import uuid
36
- import sqlite3
37
- from dataclasses import asdict, dataclass, field
38
  from datetime import datetime, timezone
39
  from pathlib import Path
40
  from typing import Any, Dict, List, Optional, Tuple
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  # =========================
44
- # IDs + versions (traceable)
45
  # =========================
46
 
47
  AGENT_ID = os.getenv("AGENT_ID", "nexdatawork_demo_agent")
48
- MODEL_ID = os.getenv("MODEL_ID", "framework_langgraph_agent")
49
  VERSION_ID = os.getenv("VERSION_ID", "1.0.0")
50
 
51
- POLICY_ID = os.getenv("POLICY_ID", "hitl_and_pricing_policy")
52
  POLICY_VERSION = os.getenv("POLICY_VERSION", "1.0")
53
 
54
- LLM_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini")
55
-
56
- # Artifacts for HF Spaces
57
  DATA_DIR = Path(os.getenv("DATA_DIR", "./data"))
58
  DATA_DIR.mkdir(parents=True, exist_ok=True)
59
  RUN_LOG_PATH = DATA_DIR / os.getenv("RUN_LOG_PATH", "run_logs.jsonl")
60
- CHECKPOINT_PATH = DATA_DIR / os.getenv("CHECKPOINT_PATH", "checkpoints.sqlite")
61
 
62
- # FinTech policy knobs
63
- RISK_THRESHOLD = float(os.getenv("RISK_THRESHOLD", "0.50"))
64
- BORDER_BAND = float(os.getenv("BORDER_BAND", "0.05"))
65
  HIGH_IMPACT_AMOUNT = float(os.getenv("HIGH_IMPACT_AMOUNT", "1000000"))
66
-
67
- # TE pricing defaults
68
- DEFAULT_COGS = float(os.getenv("DEFAULT_COGS", "6"))
69
- DEFAULT_LANDED = float(os.getenv("DEFAULT_LANDED", "10"))
70
- DEFAULT_PRESALE_MULT = float(os.getenv("DEFAULT_PRESALE_MULT", "7"))
71
- DEFAULT_PRESALE_DISCOUNT = float(os.getenv("DEFAULT_PRESALE_DISCOUNT", "0.20"))
72
- DEFAULT_PRICE_GRID = [x for x in range(42, 121, 1)] # 42..120 (demo grid)
73
 
74
 
75
- # =========================
76
- # Helpers
77
- # =========================
78
-
79
  def utc_now() -> str:
80
  return datetime.now(timezone.utc).replace(microsecond=0).isoformat()
81
 
@@ -83,7 +66,17 @@ def new_id(prefix: str) -> str:
83
  ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
84
  return f"{prefix}_{ts}_{uuid.uuid4().hex[:8]}"
85
 
86
- def run_metadata(task_type: str) -> Dict[str, Any]:
 
 
 
 
 
 
 
 
 
 
87
  return {
88
  "run_id": new_id("run"),
89
  "agent_id": AGENT_ID,
@@ -91,60 +84,22 @@ def run_metadata(task_type: str) -> Dict[str, Any]:
91
  "version_id": VERSION_ID,
92
  "policy_id": POLICY_ID,
93
  "policy_version": POLICY_VERSION,
94
- "llm_model": LLM_MODEL,
95
  "task_type": task_type,
 
96
  "timestamps": {"created_at": utc_now()},
97
  }
98
 
99
 
100
- # =========================
101
- # Redaction (API key safety)
102
- # =========================
103
-
104
- _API_KEY_PATTERN = re.compile(r"sk-[A-Za-z0-9_\-]{20,}")
105
-
106
- def redact_text(s: str) -> str:
107
- if not isinstance(s, str):
108
- return s
109
- return _API_KEY_PATTERN.sub("sk-REDACTED", s)
110
-
111
- def redact(obj: Any) -> Any:
112
- if isinstance(obj, str):
113
- return redact_text(obj)
114
- if isinstance(obj, list):
115
- return [redact(x) for x in obj]
116
- if isinstance(obj, dict):
117
- return {k: redact(v) for k, v in obj.items()}
118
- return obj
119
-
120
-
121
- # =========================
122
- # Traceable logs
123
- # =========================
124
-
125
- @dataclass
126
- class StepTrace:
127
- step_id: str
128
- name: str
129
- started_at: str
130
- ended_at: str
131
- duration_ms: int
132
- inputs: Dict[str, Any] = field(default_factory=dict)
133
- outputs: Dict[str, Any] = field(default_factory=dict)
134
- evidence: Dict[str, Any] = field(default_factory=dict)
135
- error: Optional[str] = None
136
-
137
- class TraceLogger:
138
  def __init__(self, path: Path):
139
  self.path = path
140
  self.path.parent.mkdir(parents=True, exist_ok=True)
141
 
142
- def log(self, payload: Dict[str, Any]) -> None:
143
- payload = redact(payload)
144
  with self.path.open("a", encoding="utf-8") as f:
145
  f.write(json.dumps(payload, ensure_ascii=False) + "\n")
146
 
147
- def tail(self, n: int = 30) -> List[Dict[str, Any]]:
148
  if not self.path.exists():
149
  return []
150
  lines = self.path.read_text(encoding="utf-8").splitlines()
@@ -156,89 +111,177 @@ class TraceLogger:
156
  continue
157
  return out
158
 
159
- LOGGER = TraceLogger(RUN_LOG_PATH)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
 
162
  # =========================
163
- # Deterministic "model tools" (evidence-friendly)
164
  # =========================
165
 
166
- def fintech_score_tool(income: float, debt: float, credit_score: int) -> Dict[str, Any]:
167
- """
168
- Baseline, interpretable risk score:
169
- risk = 0.6*(debt/income) + 0.4*((850-credit_score)/850)
170
- """
171
- if income <= 0:
172
- raise ValueError("income must be > 0")
173
- if debt < 0:
174
- raise ValueError("debt must be >= 0")
175
- if not (300 <= credit_score <= 850):
176
- raise ValueError("credit_score must be between 300 and 850")
177
-
178
- dti = debt / income
179
- gap = (850 - credit_score) / 850
180
- risk = (0.6 * dti) + (0.4 * gap)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
 
182
- return {
183
- "risk_score_pd": float(risk),
184
- "intermediates": {"debt_to_income": float(dti), "score_gap": float(gap)},
185
- "formula": "risk = 0.6*(debt/income) + 0.4*((850-credit_score)/850)",
186
- }
187
-
188
- def hitl_policy(risk_score_pd: float, requested_amount: float) -> Dict[str, Any]:
189
- """
190
- HITL gating:
191
- - High impact -> review
192
- - High risk -> review
193
- - Borderline -> review
194
- - Else -> draft
195
- """
196
- thr = RISK_THRESHOLD
197
- band = BORDER_BAND
198
- hi = requested_amount >= HIGH_IMPACT_AMOUNT
199
-
200
- if hi:
201
- return {"decision": "Needs Human Review", "reason": "HIGH_IMPACT_CASE", "threshold": thr, "band": band}
202
- if risk_score_pd >= (thr + band):
203
- return {"decision": "Needs Human Review", "reason": "HIGH_RISK", "threshold": thr, "band": band}
204
- if (thr - band) <= risk_score_pd < (thr + band):
205
- return {"decision": "Needs Human Review", "reason": "BORDERLINE_SCORE", "threshold": thr, "band": band}
206
- return {"decision": "Decision Draft", "reason": "LOW_RISK", "threshold": thr, "band": band}
207
-
208
- def te_pricing_tool(
209
- cogs: float,
210
- landed: float,
211
- presale_mult: float,
212
- discount: float,
213
- alpha: float = 120.0,
214
- beta: float = 0.08,
215
- ) -> Dict[str, Any]:
216
- """
217
- Consumer product pricing demo (predictive modeling placeholder):
218
- - Demand curve placeholder: demand = alpha * exp(-beta * price)
219
- - Objective: maximize (price - landed) * demand across a grid
220
- - Constraint: presale >= presale_mult * cogs, and presale >= 7*cogs (company rule)
221
- - Retail: retail = presale / (1 - discount), retail > presale
222
- """
223
- if cogs <= 0:
224
- raise ValueError("cogs must be > 0")
225
- if landed <= 0:
226
- raise ValueError("landed must be > 0")
227
- if presale_mult < 1:
228
- raise ValueError("presale_mult must be >= 1")
229
- if not (0.0 < discount < 0.9):
230
- raise ValueError("discount must be in (0, 0.9)")
231
-
232
- floor = presale_mult * cogs
233
- grid = [p for p in DEFAULT_PRICE_GRID if p >= floor]
234
 
235
- # exp without extra deps
236
- def exp(x: float) -> float:
237
- return float((2.718281828459045) ** x)
238
 
 
 
 
239
  best = None
240
  for p in grid:
241
- demand = float(alpha * exp(-beta * p))
242
  profit = (p - landed) * demand
243
  if best is None or profit > best["objective_profit"]:
244
  best = {"presale": float(p), "demand": float(demand), "objective_profit": float(profit)}
@@ -246,7 +289,6 @@ def te_pricing_tool(
246
  presale = float(best["presale"]) if best else float(floor)
247
  retail = presale / (1.0 - discount)
248
 
249
- # round to .99
250
  def as_99(x: float) -> float:
251
  v = round(x)
252
  return float(f"{max(v, 1) - 0.01:.2f}")
@@ -254,308 +296,231 @@ def te_pricing_tool(
254
  presale = as_99(presale)
255
  retail = as_99(retail)
256
 
257
- checks = {
258
- "presale_ge_floor": bool(presale >= floor),
259
- "presale_ge_7xcogs": bool(presale >= 7.0 * cogs),
260
- "retail_gt_presale": bool(retail > presale),
261
- }
262
-
263
  return {
264
- "inputs": {"cogs": cogs, "landed": landed, "presale_mult": presale_mult, "discount": discount, "alpha": alpha, "beta": beta},
265
  "presale_price": presale,
266
  "retail_price": retail,
267
- "unit_margin_presale": presale - landed,
268
- "unit_margin_retail": retail - landed,
 
 
 
 
269
  "optimization": best,
270
- "policy_checks": checks,
271
- "demand_model": "alpha * exp(-beta * price) (placeholder)",
272
- "notes": "Replace alpha/beta with real presale conversion or fitted demand model.",
273
  }
274
 
275
- def te_benchmark_placeholder() -> Dict[str, Any]:
276
- """
277
- No browsing in this demo. Provide a safe placeholder list.
278
- The OpenAI agent can draft an unverified benchmark list (marked as draft).
279
- """
280
- return {
281
- "benchmark_items": [
282
- {"category": "smart_plug", "brand": "TP-Link Kasa"},
283
- {"category": "smart_plug", "brand": "Amazon Smart Plug"},
284
- {"category": "in_wall_outlet", "brand": "Leviton Decora Smart"},
285
- {"category": "premium", "brand": "Eve (Matter/Thread)"},
286
- ],
287
- "limitations": "Placeholder only. Verify with real market data.",
288
- }
289
-
290
-
291
- # =========================
292
- # LangChain tools (for ReAct agent)
293
- # =========================
294
-
295
- def build_tools():
296
- from langchain_core.tools import tool
297
-
298
- @tool("fintech_score")
299
- def fintech_score(income: float, debt: float, credit_score: int) -> str:
300
- """Compute a baseline risk score (PD) with intermediates. Returns JSON string."""
301
- out = fintech_score_tool(income, debt, credit_score)
302
- return json.dumps(out)
303
-
304
- @tool("hitl_route")
305
- def hitl_route(score_pd: float, requested_amount: float) -> str:
306
- """Apply HITL policy routing. Returns JSON string."""
307
- out = hitl_policy(score_pd, requested_amount)
308
- return json.dumps(out)
309
-
310
- @tool("te_pricing")
311
- def te_pricing(cogs: float, landed: float, presale_mult: float, discount: float) -> str:
312
- """Compute presale + retail pricing under constraints. Returns JSON string."""
313
- out = te_pricing_tool(cogs, landed, presale_mult, discount)
314
- return json.dumps(out)
315
-
316
- @tool("te_benchmark_placeholder")
317
- def te_benchmark() -> str:
318
- """Return a placeholder competitor benchmark list. Returns JSON string."""
319
- return json.dumps(te_benchmark_placeholder())
320
-
321
- return [fintech_score, hitl_route, te_pricing, te_benchmark]
322
-
323
-
324
- # =========================
325
- # LangGraph agent with memory (thread_id)
326
- # =========================
327
-
328
- def build_checkpointer():
329
- """
330
- Use SQLite checkpointer if available; otherwise memory.
331
- We use SqliteSaver(conn) to avoid context-manager issues.
332
- """
333
- try:
334
- from langgraph.checkpoint.sqlite import SqliteSaver
335
- conn = sqlite3.connect(str(CHECKPOINT_PATH), check_same_thread=False)
336
- return SqliteSaver(conn), "sqlite"
337
- except Exception:
338
- from langgraph.checkpoint.memory import InMemorySaver
339
- return InMemorySaver(), "memory"
340
 
341
- CHECKPOINTER, CHECKPOINTER_KIND = build_checkpointer()
342
-
343
- def build_agent():
344
- """
345
- Create a tool-calling ReAct agent with memory.
346
- This follows the LangGraph "add memory" pattern using a checkpointer keyed by thread_id.
347
- """
348
- from langchain_openai import ChatOpenAI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
349
 
350
- llm = ChatOpenAI(model=LLM_MODEL, temperature=0)
 
 
351
 
352
- tools = build_tools()
 
 
353
 
354
- # Compatibility: create_react_agent moved across versions.
355
- try:
356
- from langgraph.prebuilt import create_react_agent # older path
357
- agent = create_react_agent(llm, tools, checkpointer=CHECKPOINTER)
358
- return agent
359
- except Exception:
360
- # Newer versions may not have prebuilt; fallback to langchain.agents
361
- from langchain.agents import create_react_agent as lc_create_react_agent
362
- agent = lc_create_react_agent(llm, tools)
363
- return agent
364
 
365
- AGENT = None
 
366
 
367
- def get_agent():
368
- global AGENT
369
- if AGENT is None:
370
- AGENT = build_agent()
371
- return AGENT
372
 
 
 
 
 
 
 
 
373
 
374
- # =========================
375
- # Agent runner (traceable wrapper)
376
- # =========================
377
 
378
- def invoke_agent(thread_id: str, task_type: str, user_prompt: str) -> Dict[str, Any]:
379
- """
380
- Run the agent under a thread_id, record a traceable payload, return payload.
381
- """
382
- meta = run_metadata(task_type)
383
  steps: List[StepTrace] = []
384
 
385
- # Step: memory touch (invoke a noop by reading state indirectly)
386
- # We rely on checkpointer in agent; we still record thread_id + checkpointer kind as evidence.
387
- steps.append(StepTrace(
388
- step_id=new_id("step"),
389
- name="memory_scope",
390
- started_at=utc_now(),
391
- ended_at=utc_now(),
392
- duration_ms=0,
393
- inputs={"thread_id": thread_id},
394
- outputs={"checkpointer_kind": CHECKPOINTER_KIND},
395
- evidence={"note": "Memory is keyed by thread_id via LangGraph checkpointer."},
396
- ))
397
-
398
- agent = get_agent()
399
 
400
- t0 = time.time()
401
- err = None
402
- raw = ""
403
- try:
404
- config = {"configurable": {"thread_id": thread_id}}
405
- result = agent.invoke({"messages": [{"role": "user", "content": user_prompt}]}, config=config)
406
- raw = result["messages"][-1].content if result and "messages" in result else ""
407
- raw = redact_text(raw)
408
- except Exception as e:
409
- err = redact_text(str(e))
410
-
411
- steps.append(StepTrace(
412
- step_id=new_id("step"),
413
- name="agent_invoke",
414
- started_at=utc_now(),
415
- ended_at=utc_now(),
416
- duration_ms=int((time.time() - t0) * 1000),
417
- inputs={"task_type": task_type},
418
- outputs={"raw_text_preview": raw[:2000]},
419
- error=err,
420
- evidence={"llm_model": LLM_MODEL},
421
- ))
422
-
423
- payload = {
424
- **meta,
425
- "decision": "Needs Human Review" if err else "Draft",
426
- "result": {"raw_text": raw, "error": err},
427
- "evidence": {"steps": [asdict(s) for s in steps]},
428
- }
429
 
430
- LOGGER.log(payload)
431
- return redact(payload)
 
 
 
432
 
 
 
433
 
434
- # =========================
435
- # Prompt templates (keep it simple + tool-focused)
436
- # =========================
437
 
438
- def fintech_prompt(income: float, debt: float, credit_score: int, requested_amount: float) -> str:
439
- return f"""
440
- You are running the FinTech credit risk demo.
441
- Use tools in this order:
442
- 1) fintech_score(income, debt, credit_score)
443
- 2) hitl_route(score_pd, requested_amount)
444
-
445
- Then return:
446
- - A short decision summary (Decision Draft vs Needs Human Review) and the reason.
447
- - Include the tool JSON outputs in the response (copy them).
448
- Inputs:
449
- income={income}
450
- debt={debt}
451
- credit_score={credit_score}
452
- requested_amount={requested_amount}
453
- """.strip()
454
-
455
- def te_pricing_prompt(cogs: float, landed: float, mult: float, discount: float) -> str:
456
- return f"""
457
- You are running the TE pricing demo (consumer product use case).
458
- Constraints:
459
- - presale >= 7 * COGS
460
- - retail > presale
461
- Use tools:
462
- 1) te_pricing(cogs, landed, presale_mult, discount)
463
- 2) te_benchmark_placeholder()
464
-
465
- Then return:
466
- - Suggested presale and retail prices (from tool output)
467
- - Policy checks status
468
- - A short note on what data we need to make the demand model more accurate
469
- Inputs:
470
- cogs={cogs}
471
- landed={landed}
472
- presale_mult={mult}
473
- discount={discount}
474
- """.strip()
475
 
476
 
477
  # =========================
478
- # Gradio UI
479
  # =========================
480
 
481
  def build_gradio_app():
482
  import gradio as gr
483
 
484
- def run_fintech(income, debt, credit_score, requested_amount, thread_id):
485
- prompt = fintech_prompt(float(income), float(debt), int(credit_score), float(requested_amount))
486
- payload = invoke_agent(thread_id=str(thread_id), task_type="fintech_credit_risk", user_prompt=prompt)
487
- return payload["run_id"], json.dumps(payload, indent=2)
488
-
489
- def run_te(cogs, landed, mult, discount, thread_id):
490
- prompt = te_pricing_prompt(float(cogs), float(landed), float(mult), float(discount))
491
- payload = invoke_agent(thread_id=str(thread_id), task_type="te_pricing", user_prompt=prompt)
492
- return payload["run_id"], json.dumps(payload, indent=2)
493
-
494
- def view_logs(n):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
495
  return json.dumps(LOGGER.tail(int(n)), indent=2)
496
 
497
- with gr.Blocks(title="Demo B") as demo:
498
- gr.Markdown(
499
- "## LangGraph Memory + Traceable Demo\n"
500
- "This demo shows a LangGraph/LangChain agent with memory (thread_id) and traceable run logs.\n"
501
- f"- Checkpointer: **{CHECKPOINTER_KIND}**\n"
502
- )
503
-
504
  with gr.Row():
505
- thread_id = gr.Textbox(value="demo_thread", label="thread_id (memory scope)")
506
- gr.Markdown(f"Logs: `{RUN_LOG_PATH}` \nCheckpoints: `{CHECKPOINT_PATH}`")
507
 
508
  with gr.Tabs():
509
- with gr.Tab("FinTech: Credit Risk Demo"):
510
- gr.Markdown("Simple form. Agent calls tools and returns a traceable payload.")
511
  with gr.Row():
512
- income = gr.Number(value=75000, label="Income (annual)")
513
- debt = gr.Number(value=30000, label="Debt (total)")
514
- credit_score = gr.Number(value=680, label="Credit score (300-850)")
515
- requested_amount = gr.Number(value=250000, label="Requested amount")
516
- btn = gr.Button("Run FinTech agent")
517
- out_run = gr.Textbox(label="run_id")
518
- out_json = gr.Textbox(label="traceable output JSON", lines=22)
519
- btn.click(fn=run_fintech, inputs=[income, debt, credit_score, requested_amount, thread_id], outputs=[out_run, out_json])
520
-
521
- with gr.Tab("TE: Pricing Demo"):
522
- gr.Markdown(
523
- "Consumer product use case. Agent computes presale + retail under constraints and shows benchmark placeholder.\n"
524
- "For a real benchmark, replace placeholder with curated market dataset (or verified research)."
525
- )
 
 
 
 
526
  with gr.Row():
527
- cogs = gr.Number(value=DEFAULT_COGS, label="COGS per unit")
528
- landed = gr.Number(value=DEFAULT_LANDED, label="Landed cost per unit")
529
- mult = gr.Number(value=DEFAULT_PRESALE_MULT, label="Presale floor multiplier (>=7)")
530
- discount = gr.Slider(0.10, 0.40, value=DEFAULT_PRESALE_DISCOUNT, step=0.05, label="Presale discount vs retail")
531
- btn2 = gr.Button("Run TE pricing agent")
532
- out_run2 = gr.Textbox(label="run_id")
533
- out_json2 = gr.Textbox(label="traceable output JSON", lines=22)
534
- btn2.click(fn=run_te, inputs=[cogs, landed, mult, discount, thread_id], outputs=[out_run2, out_json2])
535
-
536
- with gr.Tab("Trace Logs"):
 
 
 
 
 
 
 
 
 
537
  n = gr.Slider(10, 200, value=30, step=10, label="show last N runs")
538
- btn3 = gr.Button("Refresh logs")
539
- logs_out = gr.Textbox(lines=24, label="logs (JSON list)")
540
- btn3.click(fn=view_logs, inputs=[n], outputs=[logs_out])
541
-
542
- gr.Markdown(
543
- "### Hugging Face deploy\n"
544
- "1) Rename this file to `app.py`\n"
545
- "2) Use the provided requirements file as `requirements.txt`\n"
546
- "3) Add `OPENAI_API_KEY` in Space Secrets\n\n"
547
- "Note: I can't provide a Hugging Face account for org access. Use your own HF username and ask to be added."
548
- )
549
 
550
  return demo
551
 
552
 
553
  def main():
554
  demo = build_gradio_app()
555
- demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))
556
 
557
 
558
  if __name__ == "__main__":
559
  main()
560
-
561
- # http://localhost:7860
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from __future__ import annotations
2
 
3
  import json
4
+ import math
5
  import os
6
+ import sys
7
  import time
8
  import uuid
9
+ import traceback
10
+ from dataclasses import dataclass, asdict, field
11
  from datetime import datetime, timezone
12
  from pathlib import Path
13
  from typing import Any, Dict, List, Optional, Tuple
14
 
15
+ import numpy as np
16
+ import pandas as pd
17
+
18
+ try:
19
+ from importlib.metadata import version as pkg_version
20
+ except Exception:
21
+ pkg_version = None
22
+
23
+
24
+ # =========================
25
+ # HARD DEBUG: show what is running
26
+ # =========================
27
+ print("### THIS IS THE PURE PYTHON BUILD ###")
28
+
29
+ print("\n========== HARD DEBUG BOOT ==========")
30
+ print("RUNNING_FILE =", __file__)
31
+ print("CWD =", os.getcwd())
32
+ print("PYTHON =", sys.executable)
33
+ if pkg_version:
34
+ for p in ["gradio", "numpy", "pandas"]:
35
+ try:
36
+ print(f"PKG {p} =", pkg_version(p))
37
+ except Exception as e:
38
+ print(f"PKG {p} = <missing> ({e})")
39
+ print("=====================================\n")
40
+
41
 
42
  # =========================
43
+ # Config + logging
44
  # =========================
45
 
46
  AGENT_ID = os.getenv("AGENT_ID", "nexdatawork_demo_agent")
47
+ MODEL_ID = os.getenv("MODEL_ID", "pure_python_5step_debug")
48
  VERSION_ID = os.getenv("VERSION_ID", "1.0.0")
49
 
50
+ POLICY_ID = os.getenv("POLICY_ID", "5step_workflow_policy")
51
  POLICY_VERSION = os.getenv("POLICY_VERSION", "1.0")
52
 
 
 
 
53
  DATA_DIR = Path(os.getenv("DATA_DIR", "./data"))
54
  DATA_DIR.mkdir(parents=True, exist_ok=True)
55
  RUN_LOG_PATH = DATA_DIR / os.getenv("RUN_LOG_PATH", "run_logs.jsonl")
 
56
 
57
+ DEFAULT_SYNTHETIC_SEED = 42
 
 
58
  HIGH_IMPACT_AMOUNT = float(os.getenv("HIGH_IMPACT_AMOUNT", "1000000"))
59
+ DEFAULT_PRICE_GRID = [x for x in range(42, 121, 1)]
 
 
 
 
 
 
60
 
61
 
 
 
 
 
62
  def utc_now() -> str:
63
  return datetime.now(timezone.utc).replace(microsecond=0).isoformat()
64
 
 
66
  ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
67
  return f"{prefix}_{ts}_{uuid.uuid4().hex[:8]}"
68
 
69
+ def clamp(x: float, lo: float, hi: float) -> float:
70
+ return max(lo, min(hi, x))
71
+
72
+ def sigmoid(x: float) -> float:
73
+ if x >= 0:
74
+ z = math.exp(-x)
75
+ return 1.0 / (1.0 + z)
76
+ z = math.exp(x)
77
+ return z / (1.0 + z)
78
+
79
+ def run_metadata(task_type: str, thread_id: str) -> Dict[str, Any]:
80
  return {
81
  "run_id": new_id("run"),
82
  "agent_id": AGENT_ID,
 
84
  "version_id": VERSION_ID,
85
  "policy_id": POLICY_ID,
86
  "policy_version": POLICY_VERSION,
 
87
  "task_type": task_type,
88
+ "thread_id": thread_id,
89
  "timestamps": {"created_at": utc_now()},
90
  }
91
 
92
 
93
+ class JSONLLogger:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  def __init__(self, path: Path):
95
  self.path = path
96
  self.path.parent.mkdir(parents=True, exist_ok=True)
97
 
98
+ def append(self, payload: Dict[str, Any]) -> None:
 
99
  with self.path.open("a", encoding="utf-8") as f:
100
  f.write(json.dumps(payload, ensure_ascii=False) + "\n")
101
 
102
+ def tail(self, n: int = 50) -> List[Dict[str, Any]]:
103
  if not self.path.exists():
104
  return []
105
  lines = self.path.read_text(encoding="utf-8").splitlines()
 
111
  continue
112
  return out
113
 
114
+ LOGGER = JSONLLogger(RUN_LOG_PATH)
115
+
116
+
117
+ @dataclass
118
+ class StepTrace:
119
+ step_id: str
120
+ step_no: int
121
+ title: str
122
+ function_called: str
123
+ started_at: str
124
+ ended_at: str
125
+ duration_ms: int
126
+ evidence: Dict[str, Any] = field(default_factory=dict)
127
+ error: Optional[str] = None
128
+
129
+
130
+ def run_step(step_no: int, title: str, function_called: str, fn) -> Tuple[StepTrace, Any]:
131
+ step_id = new_id("step")
132
+ started = utc_now()
133
+ t0 = time.time()
134
+ err = None
135
+ out = None
136
+ evidence: Dict[str, Any] = {}
137
+ try:
138
+ out = fn()
139
+ if isinstance(out, dict):
140
+ evidence = out
141
+ else:
142
+ evidence = {"value": out}
143
+ except Exception:
144
+ err = traceback.format_exc()
145
+ ended = utc_now()
146
+ dur = int((time.time() - t0) * 1000)
147
+ return StepTrace(
148
+ step_id=step_id,
149
+ step_no=step_no,
150
+ title=title,
151
+ function_called=function_called,
152
+ started_at=started,
153
+ ended_at=ended,
154
+ duration_ms=dur,
155
+ evidence=evidence,
156
+ error=err,
157
+ ), out
158
 
159
 
160
  # =========================
161
+ # FINTECH pure python
162
  # =========================
163
 
164
+ def fintech_build_row(inp: Dict[str, Any]) -> pd.DataFrame:
165
+ return pd.DataFrame([{
166
+ "Income": inp["income"],
167
+ "Debt": inp["debt"],
168
+ "Credit_Score": inp["credit_score"],
169
+ "Employment_Status": inp["employment_status"],
170
+ "Missed_Payments_12m": inp["missed_payments_12m"],
171
+ "Months_On_Book": inp["months_on_book"],
172
+ "Credit_Lines": inp["credit_lines"],
173
+ "Requested_Amount": inp["requested_amount"],
174
+ }])
175
+
176
+ def synthetic_fill_numeric(df: pd.DataFrame, col: str, seed: int = DEFAULT_SYNTHETIC_SEED) -> Tuple[pd.DataFrame, Dict[str, Any]]:
177
+ rng = np.random.default_rng(seed)
178
+ out = df.copy()
179
+ s = pd.to_numeric(out[col], errors="coerce")
180
+ na = s.isna()
181
+ if not na.any():
182
+ return out, {"column": col, "filled": 0, "method": "none"}
183
+ observed = s.dropna()
184
+ observed = observed[observed >= 0]
185
+ n_missing = int(na.sum())
186
+
187
+ if len(observed) < 10:
188
+ base = rng.lognormal(mean=np.log(60000), sigma=0.7, size=n_missing) if col.lower().startswith("inc") \
189
+ else rng.lognormal(mean=np.log(8000), sigma=0.9, size=n_missing)
190
+ gen = base
191
+ method = "fallback_lognormal"
192
+ else:
193
+ q_low, q_high = observed.quantile([0.01, 0.99])
194
+ low = max(float(q_low) * 0.5, 0.0)
195
+ high = float(q_high) * 2.0
196
+ obs_pos = observed[observed > 0]
197
+ logx = np.log(obs_pos.to_numpy()) if len(obs_pos) else np.array([math.log(1.0)])
198
+ mu = float(logx.mean())
199
+ sigma = float(max(logx.std(ddof=1), 1e-6))
200
+ gen = rng.lognormal(mean=mu, sigma=sigma, size=n_missing)
201
+ gen = np.clip(gen, low, high)
202
+ method = "lognormal"
203
+
204
+ gen = np.rint(gen).astype(int)
205
+ gen = np.maximum(gen, 0)
206
+ out.loc[na, col] = gen
207
+ return out, {"column": col, "filled": n_missing, "method": method, "observed_n": int(len(observed))}
208
+
209
+ def fintech_preprocess(df: pd.DataFrame, include_synth: bool) -> Tuple[pd.DataFrame, Dict[str, Any]]:
210
+ out = df.copy()
211
+ audits: List[Dict[str, Any]] = []
212
+ missing_before = out.isna().sum().astype(int).to_dict()
213
+
214
+ if include_synth:
215
+ for col in ["Income", "Debt"]:
216
+ out, audit = synthetic_fill_numeric(out, col)
217
+ audits.append(audit)
218
+
219
+ for col in out.columns:
220
+ if pd.api.types.is_numeric_dtype(out[col]):
221
+ if out[col].isna().any():
222
+ med = pd.to_numeric(out[col], errors="coerce").median()
223
+ out[col] = pd.to_numeric(out[col], errors="coerce").fillna(med)
224
+ else:
225
+ if out[col].isna().any():
226
+ mode = out[col].dropna().mode()
227
+ fillv = mode.iloc[0] if len(mode) else "Unknown"
228
+ out[col] = out[col].fillna(fillv)
229
+
230
+ missing_after = out.isna().sum().astype(int).to_dict()
231
+ return out, {"missing_before": missing_before, "missing_after": missing_after, "fill_audit": audits[:6]}
232
+
233
+ def fintech_features(df: pd.DataFrame) -> pd.DataFrame:
234
+ out = df.copy()
235
+ out["DTI"] = (out["Debt"] / out["Income"]).clip(lower=0, upper=5)
236
+ out["Score_Gap"] = ((850 - out["Credit_Score"]) / 550).clip(lower=0, upper=1)
237
+ out["Missed_Norm"] = (out["Missed_Payments_12m"].clip(lower=0, upper=12) / 12.0)
238
+ out["Tenure_Norm"] = (out["Months_On_Book"].clip(lower=0, upper=120) / 120.0)
239
+ out["Lines_Norm"] = (out["Credit_Lines"].clip(lower=0, upper=20) / 20.0)
240
+
241
+ emp = out["Employment_Status"].astype(str).str.lower().str.strip()
242
+ emp_w = emp.map({
243
+ "employed": 0.00, "self-employed": 0.05, "student": 0.08, "unemployed": 0.18,
244
+ "retired": 0.04, "contract": 0.06, "other": 0.07
245
+ }).fillna(0.07)
246
+ out["Employment_Risk_Weight"] = emp_w
247
+ return out
248
+
249
+ def fintech_score(df_feat: pd.DataFrame) -> Dict[str, Any]:
250
+ dti = float(df_feat.loc[0, "DTI"])
251
+ gap = float(df_feat.loc[0, "Score_Gap"])
252
+ missed = float(df_feat.loc[0, "Missed_Norm"])
253
+ tenure = float(df_feat.loc[0, "Tenure_Norm"])
254
+ lines = float(df_feat.loc[0, "Lines_Norm"])
255
+ emp_w = float(df_feat.loc[0, "Employment_Risk_Weight"])
256
+
257
+ x = -1.20 + 1.60*dti + 1.40*gap + 1.10*missed + 0.90*emp_w - 0.40*tenure - 0.25*lines
258
+ pd_risk = sigmoid(x)
259
+ confidence = float(clamp(abs(pd_risk - 0.5) * 200.0, 0.0, 100.0))
260
+
261
+ req_amt = float(df_feat.loc[0, "Requested_Amount"])
262
+ bump = 0.0
263
+ if HIGH_IMPACT_AMOUNT > 0 and req_amt > 0:
264
+ ratio = req_amt / HIGH_IMPACT_AMOUNT
265
+ bump = 20.0 * clamp(math.log10(ratio + 1.0) / math.log10(11.0), 0.0, 1.0)
266
+
267
+ hitl = float(clamp((100.0 - confidence) * 0.75 + bump, 0.0, 100.0))
268
+ return {"pd_risk": pd_risk, "confidence_0_100": confidence, "hitl_urgency_0_100": hitl, "linear_x": x}
269
+
270
+ def fintech_recommend(score: Dict[str, Any]) -> Dict[str, Any]:
271
+ decision = "Needs Human Review" if (score["hitl_urgency_0_100"] >= 60.0 or score["confidence_0_100"] <= 25.0) else "Decision Draft"
272
+ return {"decision": decision}
273
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
 
275
+ # =========================
276
+ # TE pricing
277
+ # =========================
278
 
279
+ def te_grid_demand(cogs: float, landed: float, mult: float, discount: float, alpha: float, beta: float) -> Dict[str, Any]:
280
+ floor = mult * cogs
281
+ grid = [p for p in DEFAULT_PRICE_GRID if p >= floor]
282
  best = None
283
  for p in grid:
284
+ demand = float(alpha * math.exp(-beta * p))
285
  profit = (p - landed) * demand
286
  if best is None or profit > best["objective_profit"]:
287
  best = {"presale": float(p), "demand": float(demand), "objective_profit": float(profit)}
 
289
  presale = float(best["presale"]) if best else float(floor)
290
  retail = presale / (1.0 - discount)
291
 
 
292
  def as_99(x: float) -> float:
293
  v = round(x)
294
  return float(f"{max(v, 1) - 0.01:.2f}")
 
296
  presale = as_99(presale)
297
  retail = as_99(retail)
298
 
 
 
 
 
 
 
299
  return {
 
300
  "presale_price": presale,
301
  "retail_price": retail,
302
+ "unit_margin_presale": float(presale - landed),
303
+ "unit_margin_retail": float(retail - landed),
304
+ "checks": {
305
+ "presale_ge_7xcogs": presale >= 7.0 * cogs,
306
+ "retail_gt_presale": retail > presale,
307
+ },
308
  "optimization": best,
 
 
 
309
  }
310
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
 
312
+ FIVE_STEP_TITLES = [
313
+ "Interpreting Context & Metrics",
314
+ "Pre-Processing Data",
315
+ "Processing Data",
316
+ "Analyzing Data",
317
+ "Trend Analysis & Predictions",
318
+ ]
319
+
320
+ def report_5step(title: str, final_summary: Dict[str, Any], steps: List[StepTrace]) -> str:
321
+ md = []
322
+ md.append(f"## {title}\n")
323
+ md.append("### Final")
324
+ for k, v in final_summary.items():
325
+ md.append(f"- {k}: **{v}**")
326
+
327
+ md.append("\n### Five steps")
328
+ for s in steps:
329
+ md.append(f"**Step {s.step_no}. {s.title}**")
330
+ md.append(f"- function: `{s.function_called}`")
331
+ md.append(f"- step_id: `{s.step_id}`")
332
+ md.append(f"- duration_ms: `{s.duration_ms}`")
333
+ if s.error:
334
+ md.append(f"\n```text\n{s.error}\n```\n")
335
+ else:
336
+ md.append(f"- evidence: `{s.evidence}`\n")
337
+ return "\n".join(md).strip()
338
+
339
+
340
+ def run_fintech_case(thread_id: str, inp: Dict[str, Any]) -> Tuple[str, Dict[str, Any]]:
341
+ meta = run_metadata("fintech", thread_id)
342
+ steps: List[StepTrace] = []
343
 
344
+ s1, _ = run_step(1, FIVE_STEP_TITLES[0], "fintech_build_row", lambda: {"inputs": inp})
345
+ steps.append(s1)
346
+ df_raw = fintech_build_row(inp)
347
 
348
+ s2, _ = run_step(2, FIVE_STEP_TITLES[1], "fintech_preprocess", lambda: fintech_preprocess(df_raw, bool(inp.get("include_synth", True)))[1])
349
+ steps.append(s2)
350
+ df_clean, prep = fintech_preprocess(df_raw, bool(inp.get("include_synth", True)))
351
 
352
+ s3, _ = run_step(3, FIVE_STEP_TITLES[2], "fintech_features", lambda: {"features": fintech_features(df_clean)[["DTI","Score_Gap","Missed_Norm","Tenure_Norm","Employment_Risk_Weight"]].iloc[0].to_dict()})
353
+ steps.append(s3)
354
+ df_feat = fintech_features(df_clean)
 
 
 
 
 
 
 
355
 
356
+ s4, score = run_step(4, FIVE_STEP_TITLES[3], "fintech_score", lambda: fintech_score(df_feat))
357
+ steps.append(s4)
358
 
359
+ s5, rec = run_step(5, FIVE_STEP_TITLES[4], "fintech_recommend", lambda: fintech_recommend(score))
360
+ steps.append(s5)
 
 
 
361
 
362
+ final = {
363
+ "Decision": rec["decision"],
364
+ "PD risk": f"{score['pd_risk']:.3f}",
365
+ "Confidence": f"{score['confidence_0_100']:.1f}/100",
366
+ "HITL urgency": f"{score['hitl_urgency_0_100']:.1f}/100",
367
+ }
368
+ report = report_5step("FinTech Credit Risk", final, steps)
369
 
370
+ payload = {**meta, "inputs": inp, "steps": [asdict(x) for x in steps], "outputs": {"score": score, "recommendation": rec, "prep": prep}}
371
+ LOGGER.append(payload)
372
+ return report, payload
373
 
374
+ def run_te_case(thread_id: str, inp: Dict[str, Any]) -> Tuple[str, Dict[str, Any]]:
375
+ meta = run_metadata("te_pricing", thread_id)
 
 
 
376
  steps: List[StepTrace] = []
377
 
378
+ s1, _ = run_step(1, FIVE_STEP_TITLES[0], "te_context", lambda: {"inputs": inp})
379
+ steps.append(s1)
 
 
 
 
 
 
 
 
 
 
 
 
380
 
381
+ s2, _ = run_step(2, FIVE_STEP_TITLES[1], "te_input_checks", lambda: {
382
+ "checks": {"cogs_gt_0": inp["cogs"] > 0, "landed_gt_0": inp["landed"] > 0, "discount_range": 0.0 < inp["discount"] < 0.9}
383
+ })
384
+ steps.append(s2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
 
386
+ s3, _ = run_step(3, FIVE_STEP_TITLES[2], "te_derive", lambda: {
387
+ "presale_floor": max(inp["presale_mult"] * inp["cogs"], 7.0 * inp["cogs"]),
388
+ "implied_retail_floor": max(inp["presale_mult"] * inp["cogs"], 7.0 * inp["cogs"]) / (1.0 - inp["discount"]),
389
+ })
390
+ steps.append(s3)
391
 
392
+ s4, pricing = run_step(4, FIVE_STEP_TITLES[3], "te_grid_demand", lambda: te_grid_demand(inp["cogs"], inp["landed"], inp["presale_mult"], inp["discount"], inp["alpha"], inp["beta"]))
393
+ steps.append(s4)
394
 
395
+ s5, _ = run_step(5, FIVE_STEP_TITLES[4], "te_summary", lambda: {"presale": pricing["presale_price"], "retail": pricing["retail_price"], "checks": pricing["checks"]})
396
+ steps.append(s5)
 
397
 
398
+ final = {"Presale": pricing["presale_price"], "Retail": pricing["retail_price"], "Checks": pricing["checks"]}
399
+ report = report_5step("TE Pricing", final, steps)
400
+
401
+ payload = {**meta, "inputs": inp, "steps": [asdict(x) for x in steps], "outputs": {"pricing": pricing}}
402
+ LOGGER.append(payload)
403
+ return report, payload
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
404
 
405
 
406
  # =========================
407
+ # UI (debug: always show traceback)
408
  # =========================
409
 
410
  def build_gradio_app():
411
  import gradio as gr
412
 
413
+ STATE: Dict[str, Any] = {"last_payload": None}
414
+ emp_choices = ["Employed", "Self-employed", "Student", "Unemployed", "Retired", "Contract", "Other"]
415
+
416
+ def safe_call(fn):
417
+ try:
418
+ return fn()
419
+ except Exception:
420
+ tb = traceback.format_exc()
421
+ print(tb)
422
+ return "ERROR:\n\n```text\n" + tb + "\n```"
423
+
424
+ def ui_fintech(income, debt, credit_score, employment_status, missed_12m, months_on_book, credit_lines, requested_amount, include_synth, thread_id):
425
+ def _do():
426
+ inp = {
427
+ "income": float(income),
428
+ "debt": float(debt),
429
+ "credit_score": int(credit_score),
430
+ "employment_status": str(employment_status),
431
+ "missed_payments_12m": int(missed_12m),
432
+ "months_on_book": int(months_on_book),
433
+ "credit_lines": int(credit_lines),
434
+ "requested_amount": float(requested_amount),
435
+ "include_synth": bool(include_synth),
436
+ }
437
+ report, payload = run_fintech_case(str(thread_id), inp)
438
+ STATE["last_payload"] = payload
439
+ return report
440
+ return safe_call(_do)
441
+
442
+ def ui_te(cogs, landed, mult, discount, alpha, beta, thread_id):
443
+ def _do():
444
+ inp = {
445
+ "cogs": float(cogs),
446
+ "landed": float(landed),
447
+ "presale_mult": float(mult),
448
+ "discount": float(discount),
449
+ "alpha": float(alpha),
450
+ "beta": float(beta),
451
+ }
452
+ report, payload = run_te_case(str(thread_id), inp)
453
+ STATE["last_payload"] = payload
454
+ return report
455
+ return safe_call(_do)
456
+
457
+ def ui_current_trace():
458
+ p = STATE.get("last_payload")
459
+ if not p:
460
+ return "No run yet."
461
+ return json.dumps(p, indent=2)
462
+
463
+ def ui_logs(n):
464
  return json.dumps(LOGGER.tail(int(n)), indent=2)
465
 
466
+ with gr.Blocks(title="Demo") as demo:
 
 
 
 
 
 
467
  with gr.Row():
468
+ thread_id = gr.Textbox(value="demo_thread", label="thread_id")
 
469
 
470
  with gr.Tabs():
471
+ with gr.Tab("FinTech"):
 
472
  with gr.Row():
473
+ with gr.Column(scale=5, min_width=460):
474
+ income = gr.Number(value=75000, label="Income (annual)")
475
+ debt = gr.Number(value=30000, label="Debt (total)")
476
+ credit_score = gr.Number(value=680, label="Credit score (300-850)")
477
+ requested_amount = gr.Number(value=250000, label="Requested amount")
478
+ employment_status = gr.Dropdown(choices=emp_choices, value="Employed", label="Employment status")
479
+ missed_12m = gr.Number(value=1, label="Missed payments (12m)")
480
+ months_on_book = gr.Number(value=18, label="Months on book")
481
+ credit_lines = gr.Number(value=4, label="Credit lines")
482
+ include_synth = gr.Checkbox(value=True, label="Handle missing values (synthetic fill)")
483
+ btn = gr.Button("Run", variant="primary")
484
+
485
+ with gr.Column(scale=7, min_width=640):
486
+ out = gr.Markdown(value="_(Run to see output.)_")
487
+
488
+ btn.click(fn=ui_fintech, inputs=[income, debt, credit_score, employment_status, missed_12m, months_on_book, credit_lines, requested_amount, include_synth, thread_id], outputs=[out])
489
+
490
+ with gr.Tab("TE"):
491
  with gr.Row():
492
+ with gr.Column(scale=5, min_width=460):
493
+ cogs = gr.Number(value=6, label="COGS per unit")
494
+ landed = gr.Number(value=10, label="Landed cost per unit")
495
+ mult = gr.Number(value=7, label="Presale floor multiplier")
496
+ discount = gr.Number(value=0.20, label="Discount (0-0.9)")
497
+ alpha = gr.Number(value=120, label="Demand alpha (placeholder)")
498
+ beta = gr.Number(value=0.08, label="Demand beta (placeholder)")
499
+ btn2 = gr.Button("Run", variant="primary")
500
+
501
+ with gr.Column(scale=7, min_width=640):
502
+ out2 = gr.Markdown(value="_(Run to see output.)_")
503
+
504
+ btn2.click(fn=ui_te, inputs=[cogs, landed, mult, discount, alpha, beta, thread_id], outputs=[out2])
505
+
506
+ with gr.Tab("Trace"):
507
+ btn3 = gr.Button("Show current run")
508
+ cur = gr.Code(language="json")
509
+ btn3.click(fn=ui_current_trace, inputs=[], outputs=[cur])
510
+
511
  n = gr.Slider(10, 200, value=30, step=10, label="show last N runs")
512
+ btn4 = gr.Button("Refresh logs")
513
+ logs = gr.Code(language="json")
514
+ btn4.click(fn=ui_logs, inputs=[n], outputs=[logs])
 
 
 
 
 
 
 
 
515
 
516
  return demo
517
 
518
 
519
  def main():
520
  demo = build_gradio_app()
521
+ demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")), debug=True)
522
 
523
 
524
  if __name__ == "__main__":
525
  main()
526
+ # http://localhost:7860
 
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio>=4.31.0
2
+ numpy>=1.24.0
3
+ pandas>=2.0.0
4
+ openai>=1.40.0
5
+ scikit-learn>=1.3.0
6
+ matplotlib>=3.8.0
requirements_demo_b.txt CHANGED
@@ -1,7 +1,8 @@
1
- gradio>=4.0.0
2
- langgraph>=1.0.0
3
- langgraph-checkpoint>=1.0.0
4
- langgraph-checkpoint-sqlite>=1.0.0
5
- langchain-core>=0.2.0
6
- langchain-openai>=0.1.0
7
- openai>=1.0.0
 
 
1
+ gradio>=4.31.0
2
+ langgraph>=0.2.34
3
+ langchain>=0.2.16
4
+ langchain-core>=0.2.38
5
+ langchain-openai>=0.1.22
6
+ openai>=1.40.0
7
+ numpy>=1.24.0
8
+ pandas>=2.0.0