GenAICoder commited on
Commit
abc0c0f
·
verified ·
1 Parent(s): b5c2654

Create ai_assistant.py

Browse files
Files changed (1) hide show
  1. analytics/ai_assistant.py +181 -0
analytics/ai_assistant.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+
4
+ try:
5
+ from huggingface_hub import InferenceClient
6
+ except ImportError as exc:
7
+ raise ImportError(
8
+ "huggingface_hub is required for AI assistant support. "
9
+ "Install it with `pip install huggingface_hub`."
10
+ ) from exc
11
+
12
+ HF_MODEL_ID = os.environ.get("HF_MODEL_ID", "Qwen/Qwen2.5-7B-Instruct")
13
+ HF_TOKEN = os.environ.get("HUGGINGFACE_API_TOKEN")
14
+ HF_MAX_TOKENS = int(os.environ.get("HF_MAX_TOKENS", 512))
15
+ HF_TEMPERATURE = float(os.environ.get("HF_TEMPERATURE", 0.3))
16
+
17
+
18
+ def _detect_date_column(df: pd.DataFrame):
19
+ candidates = [
20
+ "reporting_month",
21
+ "observation_date",
22
+ "observation_month",
23
+ "obs_date",
24
+ "date",
25
+ "calendar_month",
26
+ "month",
27
+ "report_date"
28
+ ]
29
+ for c in candidates:
30
+ if c in df.columns:
31
+ return c
32
+ return None
33
+
34
+
35
+ def _filter_by_month(df: pd.DataFrame, as_of_month: str | None):
36
+ if not as_of_month or as_of_month == "All":
37
+ return df.copy()
38
+
39
+ date_col = _detect_date_column(df)
40
+ if date_col is None:
41
+ return df.copy()
42
+
43
+ ser = pd.to_datetime(df[date_col], errors="coerce").dt.to_period("M").astype(str)
44
+ return df[ser == as_of_month].copy()
45
+
46
+
47
+ def _fmt_pct(value):
48
+ try:
49
+ return f"{round(float(value), 2)}%"
50
+ except Exception:
51
+ return "N/A"
52
+
53
+
54
+ def build_portfolio_context(df: pd.DataFrame, as_of_month: str | None = None, segment: str | None = None):
55
+ df = _filter_by_month(df, as_of_month)
56
+
57
+ total_accounts = int(df["account_id"].nunique()) if "account_id" in df.columns else 0
58
+ open_accounts = int(df.loc[df["balance"] > 0, "account_id"].nunique()) if "balance" in df.columns else total_accounts
59
+ bad_accounts = int(df.loc[df["dpd"].fillna(0) >= 30, "account_id"].nunique()) if "dpd" in df.columns else 0
60
+ total_balance = float(df["balance"].sum(skipna=True)) if "balance" in df.columns else 0.0
61
+
62
+ ncl_cols = [c for c in df.columns if "ncl" in c.lower()]
63
+ if len(ncl_cols) > 0 and total_balance > 0:
64
+ overall_ncl_rate = df[ncl_cols[0]].sum(skipna=True) / total_balance * 100
65
+ elif total_balance > 0 and "dpd" in df.columns:
66
+ bad_balance = float(df.loc[df["dpd"].fillna(0) >= 30, "balance"].sum())
67
+ overall_ncl_rate = bad_balance / total_balance * 100
68
+ else:
69
+ overall_ncl_rate = None
70
+
71
+ if overall_ncl_rate is None:
72
+ overall_ncl_rate_text = "N/A"
73
+ else:
74
+ overall_ncl_rate_text = _fmt_pct(overall_ncl_rate)
75
+
76
+ if "fico_score" in df.columns:
77
+ avg_fico = round(df["fico_score"].dropna().mean(), 1)
78
+ elif "fico_band" in df.columns:
79
+ def band_mid(val):
80
+ try:
81
+ lo, hi = val.split("-")
82
+ return (int(lo) + int(hi)) / 2
83
+ except Exception:
84
+ return None
85
+ mid_vals = df["fico_band"].dropna().apply(band_mid).dropna()
86
+ avg_fico = round(mid_vals.mean(), 1) if not mid_vals.empty else None
87
+ else:
88
+ avg_fico = None
89
+
90
+ as_of_month_text = (
91
+ "all data" if as_of_month == "All" else (as_of_month or "latest available")
92
+ )
93
+
94
+ lines = [
95
+ f"As of month: {as_of_month_text}",
96
+ f"Total accounts: {total_accounts}",
97
+ f"Open accounts: {open_accounts}",
98
+ f"Bad accounts (dpd>=30): {bad_accounts}",
99
+ f"Overall NCL rate: {overall_ncl_rate_text}",
100
+ f"Average FICO: {avg_fico if avg_fico is not None else 'N/A'}"
101
+ ]
102
+
103
+ if segment and segment in df.columns:
104
+ segment_summary = (
105
+ df.groupby(segment)
106
+ .agg(
107
+ accounts=("account_id", "nunique"),
108
+ balance=("balance", "sum"),
109
+ bad_balance=("balance", lambda x: x[df.loc[x.index, "dpd"].fillna(0) >= 30].sum() if "dpd" in df.columns else 0)
110
+ )
111
+ .reset_index()
112
+ )
113
+ if "balance" in df.columns:
114
+ segment_summary["ncl_rate"] = (segment_summary["bad_balance"] / segment_summary["balance"] * 100).round(2).fillna(0)
115
+ else:
116
+ segment_summary["ncl_rate"] = 0
117
+
118
+ lines.append(f"Segment breakdown by {segment}:")
119
+ for _, row in segment_summary.sort_values("ncl_rate", ascending=False).head(5).iterrows():
120
+ lines.append(
121
+ f" - {row[segment]}: accounts={int(row['accounts'])}, balance={row['balance']:.0f}, ncl={_fmt_pct(row['ncl_rate'])}"
122
+ )
123
+
124
+ return "\n".join(lines)
125
+
126
+
127
+ def _get_inference_client():
128
+ if not HF_TOKEN:
129
+ raise RuntimeError(
130
+ "HUGGINGFACE_API_TOKEN is required for AI assistant. "
131
+ "Set it in your environment before running the app."
132
+ )
133
+ else:
134
+ print("Inference set up successful.")
135
+ return InferenceClient(token=HF_TOKEN)
136
+
137
+
138
+ def generate_ai_answer(question: str, df: pd.DataFrame, as_of_month: str | None = None, segment: str | None = None):
139
+ summary = build_portfolio_context(df, as_of_month=as_of_month, segment=segment)
140
+ prompt = (
141
+ "You are a senior risk manager assistant responding to portfolio analytics questions. "
142
+ "Use the risk context below and answer the user clearly, describing what is happening, why it is happening, and what actions should be considered. "
143
+ "If you cannot answer from the data, say so clearly." +
144
+ "Context:\n" + summary + "\n\n" +
145
+ "Question: " + question + "\n\n" +
146
+ "Answer as a risk manager with practical, concise guidance."
147
+ )
148
+
149
+ client = _get_inference_client()
150
+ print("Inference called successfully")
151
+ # 1. Format your prompt into OpenAI message style
152
+ messages = [{"role": "user", "content": prompt}]
153
+ print(messages)
154
+
155
+
156
+ response = client.chat.completions.create(
157
+ model=HF_MODEL_ID,
158
+ messages=messages, # Changed from prompt
159
+ max_tokens=HF_MAX_TOKENS, # Changed from max_new_tokens
160
+ temperature=HF_TEMPERATURE,
161
+ top_p=0.95
162
+ )
163
+ #response = client.chat.completions.create(
164
+ # model=HF_MODEL_ID,
165
+ # prompt = prompt,
166
+ # max_new_tokens= HF_MAX_TOKENS,
167
+ # temperature= HF_TEMPERATURE,
168
+ # top_k= 50,
169
+ # top_p= 0.95
170
+ #)
171
+ print(response.choices[0].message.content)
172
+
173
+
174
+
175
+ output_text = response.choices[0].message.content if hasattr(response, 'choices') else (response[0].get('generated_text') if isinstance(response, list) else response)
176
+ return (output_text)
177
+ #if isinstance(response, dict):
178
+ # return response.get("generated_text") or response.get("text") or str(response)
179
+ #if isinstance(response, list) and len(response) > 0:
180
+ # return response[0].get("generated_text", str(response[0]))
181
+ #return str(response)