Spaces:
Running
Running
File size: 21,377 Bytes
1d0b04b e6970ed 1d0b04b ad0ab13 e6970ed 7e810ce a7789ad f316f5a 7e810ce e6970ed ad0ab13 e6970ed ad0ab13 f316f5a 55da5c9 1d0b04b 1c2dd4b e5d102f e6970ed 1d0b04b 63016d2 9cbb438 63016d2 9cbb438 63016d2 9cbb438 63016d2 9cbb438 2e1ad6a 9cbb438 2e1ad6a 63016d2 2e1ad6a 63016d2 2e1ad6a 1d0b04b 2e1ad6a 1d0b04b 7e810ce ad0ab13 e6970ed 7e810ce e6970ed ad0ab13 e6970ed ad0ab13 4976ef6 ad0ab13 9cbb438 ad0ab13 e6970ed 9cbb438 e6970ed 9cbb438 e6970ed 9cbb438 e6970ed 7e810ce f316f5a 1c2dd4b f316f5a 1d0b04b f316f5a 1d0b04b f316f5a 1d0b04b f316f5a 1d0b04b 1c2dd4b f316f5a 1c2dd4b 1d0b04b 2e1ad6a f316f5a 2e1ad6a f316f5a 1d0b04b f316f5a 1d0b04b 2e1ad6a f316f5a 1d0b04b f316f5a 1d0b04b f316f5a 1d0b04b f316f5a 1d0b04b f316f5a 724f227 f316f5a 724f227 2e1ad6a 724f227 7e810ce 9cbb438 2e1ad6a 7e810ce 724f227 f316f5a 724f227 f316f5a 724f227 2e1ad6a 724f227 7e810ce 2e1ad6a 7e810ce 724f227 f316f5a 1d0b04b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 | """
Agent inference using Modal GPU endpoint, HuggingFace Inference API, or mock mode.
No llama.cpp dependency. Inference is handled by:
- "modal" -> remote Modal GPU endpoint (if MODAL_INFERENCE_URL set)
- "hf" -> HuggingFace Inference API (if HF_API_URL + HF_TOKEN set)
- "mock" -> deterministic test mode (MOCK_LLM=1 or fallback)
All features have deterministic fallbacks so the app works without any LLM.
"""
import json
import os
import re
from typing import Dict, List
from dotenv import load_dotenv
load_dotenv()
ASSETS = ["cash", "fd", "gov_bonds", "nifty_50", "nifty_it", "real_estate", "crypto", "gold"]
PERSONAS = ["whale", "retail", "permabull"]
MODAL_URL = os.getenv("MODAL_INFERENCE_URL", "").rstrip("/")
USE_MODAL = bool(MODAL_URL)
HF_API_URL = os.getenv("HF_API_URL", "").rstrip("/")
HF_TOKEN = os.getenv("HF_TOKEN", "")
USE_HF = bool(HF_API_URL) and bool(HF_TOKEN)
_llm_status = "uninitialized"
_llm_error = ""
if os.getenv("MOCK_LLM") == "1":
_llm_status = "mock"
_llm_error = "MOCK_LLM=1 (test mode)"
elif USE_MODAL:
_llm_status = "modal"
_llm_error = ""
elif USE_HF:
_llm_status = "hf"
_llm_error = ""
else:
_llm_status = "mock"
_llm_error = "No inference backend configured (set MODAL_INFERENCE_URL or HF_API_URL+HF_TOKEN, or MOCK_LLM=1)"
def llm_status() -> str:
return _llm_status
def llm_error() -> str:
return _llm_error
def start_background_load() -> None:
pass
def strip_reasoning_narration(text: str) -> str:
"""Detect and remove model's internal monologue where it repeats
instructions/processes the prompt before giving the actual answer.
Nemotron often outputs its reasoning as plain text, e.g.:
'User wants a single sentence... Output only the sentence. Hold cash.'
We keep only the actual answer portion."""
if not text:
return text
# Reasoning markers: phrases the model uses when talking to itself
reasoning_markers = [
r'^user\s+(wants|says|asks|is\s|needs|has|gave|provided)',
r'^the\s+user\s',
r'^(i\s+)?need\s+to\s',
r'^(let|let\'s)\s+(me\s+|us\s+)?(think|analyze|consider|check|review|break|figure|process|reason)',
r'^(we|i)\s+(need|should|must|have\s+to|want)\s',
r'^we\s+need\s+(to\s+)?output\s+(one|a)\s+sentence',
r'^output\s+only\s',
r'^(this|it)\s+(is|seems|appears|looks)\s+(like|to\s+be)',
r'^(okay|ok|so|alright|well|now|right|hmm|hmmm)[\s,]+',
r'^the\s+(task|prompt|instruction|request|question)\s',
r'^(based|given)\s+(on|the)\s',
r'^respond\s+(with|to|as)\s',
r'^reply\s+(with|to|as)\s',
r'^(my|the)\s+(response|reply|answer|output)\s+(should|must|needs|will|is)\s',
r'^starting\s+portfolio',
r'^portfolio[\s:]+',
r'^\d+%\s+cash',
r'^(total|pnl|sharpe|drawdown)[\s:]+',
r'^that\'?s\s+\d+\s+sentenc',
r'^in\s+(ai|the)\s+(insight|chat|advisory)',
r'^need\s+(to\s+)?be\s+under\s',
r'^so\s+reply',
r'^keep\s+in\s+character',
r'^i\s+(am|will|would|can)\s+(now\s+)?(give|provide|output|share|generate)',
r'^(here\s+is|here\'s)\s+(the|my|a|an)\s+(insight|response|answer|sentence)',
]
# Split into paragraphs (double-newline preferred, single newline as fallback)
paras = re.split(r'\n\s*\n', text)
paras = [p.strip() for p in paras if p.strip()]
if len(paras) <= 1:
lines = [l.strip() for l in text.split('\n') if l.strip()]
if len(lines) <= 1:
# Single block — try sentence-level extraction
return _strip_reasoning_sentences(text, reasoning_markers)
paras = lines
if len(paras) <= 1:
return _strip_reasoning_sentences(text, reasoning_markers)
# Classify each paragraph as reasoning or answer
results = []
for para in paras:
plow = para.lower().strip()
is_reasoning = False
for pattern in reasoning_markers:
if re.search(pattern, plow):
is_reasoning = True
break
results.append((para, is_reasoning))
if results and results[0][1]:
for para, is_r in reversed(results):
if not is_r:
return para.strip()
return results[-1][0].strip()
return text
def _strip_reasoning_sentences(text: str, reasoning_markers: list) -> str:
"""For single-paragraph text, split into sentences and remove reasoning ones."""
sentences = re.split(r'(?<=[.!?])\s+', text)
if len(sentences) <= 1:
# Try comma-splitting for run-on model output
sentences = re.split(r'(?<=[.,;])\s+(?=[A-Z])', text)
if len(sentences) <= 1:
return text
results = []
for s in sentences:
slow = s.lower().strip()
is_reasoning = False
for pattern in reasoning_markers:
if re.search(pattern, slow):
is_reasoning = True
break
results.append((s, is_reasoning))
answer_parts = [s for s, is_r in results if not is_r]
if answer_parts:
return ' '.join(answer_parts).strip()
# If all sentences look like reasoning, take the last one (model often ends with answer)
return results[-1][0].strip()
def _strip_prompt_echo(text: str, prompt: str = "", system: str = "") -> str:
"""Remove the echoed prompt from the model output.
Some backends return prompt + generated text."""
if not text:
return text
candidates = []
if system:
candidates.append(system.strip().rstrip('.'))
if prompt:
candidates.append(prompt.strip().rstrip('.'))
for cand in candidates:
if not cand:
continue
idx = text.lower().find(cand.lower()[:min(len(cand), 60)])
if idx == 0 or (idx > 0 and idx < 20 and text[:idx].strip() in ("", "system\n", "System:", "Assistant:")):
# Found the prompt at the start; cut right after it
end = idx + len(cand)
# Also consume trailing whitespace/newlines/delimiters
while end < len(text) and text[end] in (' ', '\n', '\r', '\t', ':', ',', '-', '.'):
end += 1
text = text[end:].strip()
break
return text
def clean_text(text: str, prompt: str = "", system: str = "") -> str:
"""Aggressively strip model cruft: think blocks, AI prefixes, markdown, noise."""
if not text or not text.strip():
return ""
text = text.strip()
# Strip echoed prompt (model repeating the instruction back)
if prompt or system:
text = _strip_prompt_echo(text, prompt, system)
# Strip all <think>...</think> blocks (including nested/malformed)
while "<think" in text.lower():
s = text.lower().find("<think")
e = text.find(">", s)
tag_end = e + 1 if e != -1 else s + 7
close = text.lower().find("</think", tag_end)
if close != -1:
close_end = text.find(">", close)
text = (text[:s] + text[(close_end + 1) if close_end != -1 else (close + 8):]).strip()
else:
text = text[:s].strip()
break
# Strip reasoning narration (model talking to itself)
text = strip_reasoning_narration(text)
# Remove common AI preamble patterns (must be at start of text followed by colon/newline)
prefixes_to_strip = [
"assistant:", "ai:", "bot:", "response:", "reply:",
"here is", "here's", "okay",
]
for prefix in prefixes_to_strip:
low = text.lower().strip()
if low.startswith(prefix):
after = text[len(prefix):].strip()
if after.startswith(':') or after.startswith(',') or after.startswith('-'):
after = after[1:].strip()
if len(after) > len(prefix):
text = after
break
# Remove markdown formatting
text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
text = re.sub(r'\*(.+?)\*', r'\1', text)
text = re.sub(r'`(.+?)`', r'\1', text)
text = re.sub(r'^[#\-\*>]+\s*', '', text, flags=re.MULTILINE)
# Collapse multiple newlines into max 2
text = re.sub(r'\n{3,}', '\n\n', text)
# Strip JSON wrapper if present
try:
if text.startswith('{') and text.endswith('}'):
data = json.loads(text)
for key in ('insight', 'reply', 'text', 'content', 'response', 'message', 'output'):
if key in data and isinstance(data[key], str) and data[key].strip():
text = data[key]
break
except (json.JSONDecodeError, TypeError):
pass
return text.strip()
def sanitize_for_display(text: str, max_chars: int = 500) -> str:
"""Final polish before showing to the player: full clean + truncate."""
text = clean_text(text)
if not text or not text.strip():
return ""
text = text.strip()
# Remove any remaining <think> fragments (case insensitive)
text = re.sub(r'</?think[^>]*>', '', text, flags=re.IGNORECASE)
# Strip field-name prefixes from structured output (insight:, roast:, etc.)
for field in ('insight', 'roast', 'lesson', 'suggestion', 'reply', 'response',
'agent', 'action', 'reason', 'sentiment', 'headline', 'output',
'text', 'content'):
prefix = field + ':'
low = text.lower()
if low.startswith(prefix):
text = text[len(prefix):].strip()
# Remove lines that are just whitespace
text = re.sub(r'\n\s*\n\s*\n', '\n\n', text)
# Ensure it starts with a capital letter
if text and text[0].islower():
text = text[0].upper() + text[1:]
# Truncate to max chars at word boundary
if len(text) > max_chars:
text = text[:max_chars].rsplit(' ', 1)[0]
return text.strip()
def generate(prompt: str, system: str = "", max_tokens: int = 256, temperature: float = 0.7) -> str:
if _llm_status == "mock":
return mock_generate(prompt, system)
if USE_MODAL:
return _modal_generate(prompt, system, max_tokens, temperature)
if USE_HF:
return _hf_generate(prompt, system, max_tokens, temperature)
return ""
def _modal_generate(prompt: str, system: str, max_tokens: int = 256, temperature: float = 0.7) -> str:
import time
try:
import httpx
except ImportError:
print("httpx not installed. Install it: pip install httpx")
return ""
messages = []
if system:
messages.append({"role": "system", "content": system})
messages.append({"role": "user", "content": prompt})
for attempt in range(2):
try:
resp = httpx.post(
f"{MODAL_URL}/chat",
json={"messages": messages, "max_tokens": max_tokens, "temperature": temperature},
timeout=180.0,
)
resp.raise_for_status()
data = resp.json()
content = data["choices"][0]["message"]["content"]
if isinstance(content, str) and content.strip():
return clean_text(content, prompt=prompt, system=system)
except Exception as e:
print(f"Modal inference attempt {attempt + 1} failed: {e}")
if attempt == 0:
time.sleep(2)
print("Warning: Modal inference returned empty content after retries.")
return ""
def _hf_generate(prompt: str, system: str, max_tokens: int = 256, temperature: float = 0.7) -> str:
try:
import httpx
except ImportError:
print("httpx not installed. Install it: pip install httpx")
return ""
messages = []
if system:
messages.append({"role": "system", "content": system})
messages.append({"role": "user", "content": prompt})
try:
resp = httpx.post(
HF_API_URL,
json={
"inputs": messages,
"parameters": {"max_new_tokens": max_tokens, "temperature": temperature},
},
headers={"Authorization": f"Bearer {HF_TOKEN}"},
timeout=120.0,
)
resp.raise_for_status()
data = resp.json()
# Handle various HF response formats
if isinstance(data, list) and data and "generated_text" in data[0]:
content = data[0]["generated_text"]
if isinstance(content, str) and content.strip():
return clean_text(content, prompt=prompt, system=system)
if isinstance(data, dict) and "generated_text" in data:
content = data["generated_text"]
if isinstance(content, str) and content.strip():
return clean_text(content, prompt=prompt, system=system)
# Chat-format response (choices array)
if isinstance(data, dict) and "choices" in data:
content = data["choices"][0].get("message", {}).get("content", "")
if isinstance(content, str) and content.strip():
return clean_text(content, prompt=prompt, system=system)
except Exception as e:
print(f"HF inference failed: {e}")
return ""
def mock_generate(prompt: str, system: str = "") -> str:
p = prompt.lower()
s = system.lower()
if "agent" in p and "whale" in p:
return "agent: whale\naction: buy gov_bonds 0.10\nreason: safety first\nsentiment: cautious"
if "agent" in p and "retail" in p:
return "agent: retail\naction: sell nifty_it 0.10\nreason: panic selling\nsentiment: panic"
if "agent" in p:
return "agent: permabull\naction: buy crypto 0.10\nreason: buy the dip\nsentiment: bullish"
if "roast" in p or "sharpe_ratio" in p:
return "roast: diversify more\nsharpe_ratio: 0.5\nlesson: Sharpe ratio measures risk-adjusted return\nsuggestion: add bonds"
if "insight" in p or "commentary" in p or "commentator" in s:
return "insight: Markets are reacting to the headline. Watch for follow-through."
if "headline" in p:
return "headline: RBI holds rates steady\nimpact: cash:0 fd:0 gov_bonds:0 nifty_50:0 nifty_it:0 real_estate:0 crypto:0 gold:0\nduration: 1"
return ""
def parse_agent_response(response: str, persona: str) -> Dict:
response = clean_text(response)
try:
m_agent = re.search(r"agent:\s*(\w+)", response, re.IGNORECASE)
agent = (m_agent.group(1).lower() if m_agent else persona) or persona
m_action = re.search(r"action:\s*(buy|sell|hold)\s+(\w+)\s+([\d.%]+)", response, re.IGNORECASE)
m_reason = re.search(r"reason:\s*(.+)", response, re.IGNORECASE)
m_sent = re.search(r"sentiment:\s*(\w+)", response, re.IGNORECASE)
if not m_action:
return {"agent": agent, "actions": [{"asset": "cash", "action": "hold", "amount_pct": 0.0, "reason": "no action"}], "sentiment": "neutral"}
return {
"agent": agent,
"actions": [{
"asset": m_action.group(2),
"action": m_action.group(1),
"amount_pct": float(m_action.group(3)),
"reason": (m_reason.group(1).strip() if m_reason else ""),
}],
"sentiment": (m_sent.group(1).lower() if m_sent else "neutral"),
}
except Exception as e:
return {"agent": persona, "actions": [{"asset": "cash", "action": "hold", "amount_pct": 0.0, "reason": f"parse error: {e}"}], "sentiment": "neutral"}
def parse_news_response(response: str) -> Dict:
response = clean_text(response)
try:
m_head = re.search(r"headline:\s*(.+)", response, re.IGNORECASE)
m_imp = re.search(r"impact:\s*(.+?)(?:\nduration:|$)", response, re.DOTALL | re.IGNORECASE)
m_dur = re.search(r"duration:\s*(\d+)", response, re.IGNORECASE)
headline = m_head.group(1).strip() if m_head else "Markets mixed"
impact = {}
if m_imp:
for token in m_imp.group(1).strip().split():
if ":" in token:
k, v = token.split(":")
try:
impact[k] = float(v)
except ValueError:
pass
for a in ASSETS:
impact.setdefault(a, 0.0)
duration = int(m_dur.group(1)) if m_dur else 1
return {"headline": headline, "impact": impact, "duration_months": duration}
except Exception as e:
return {"headline": "Markets mixed", "impact": {a: 0.0 for a in ASSETS}, "duration_months": 1, "error": str(e)}
def decide_agent(persona: str, state: Dict) -> Dict:
system = (
f"You are an NPC trader in an Indian stock-market game. "
f"Output the {persona}'s decision in EXACT format:\n"
f"agent: {persona}\naction: <buy|sell|hold> <asset> <amount_pct>\n"
f"reason: <short reason>\nsentiment: <bullish|bearish|neutral|panic|cautious>"
)
compact = {
"month": state.get("month"),
"year": state.get("year"),
"cash": state.get("cash"),
"total_value": state.get("total_value"),
}
prompt = f"State: {json.dumps(compact)}. Persona: {persona}. Decide."
response = generate(prompt, system=system, max_tokens=150, temperature=0.6)
return parse_agent_response(response, persona)
def generate_news(event: Dict) -> Dict:
headline = event.get("headline", "Markets trade in tight range")
regime = event.get("regime", "stagnation")
impact = event.get("impact", {})
for a in ASSETS:
impact.setdefault(a, 0.0)
return {
"headline": headline,
"regime": regime,
"impact": {k: float(v) for k, v in impact.items()},
"duration_months": int(event.get("duration_months", 1)),
"year": int(event.get("year", 0)),
"month": int(event.get("month", 0)),
}
def generate_insight(event: Dict, state_snapshot: Dict) -> str:
if not event:
return "Markets are quiet. Use the time to review your allocation."
pnl = float(state_snapshot.get("unrealized_pnl", 0.0))
cash = float(state_snapshot.get("cash", 0.0))
total = float(state_snapshot.get("total_value", 0.0))
cash_pct = (cash / total * 100.0) if total else 0.0
regime = str(event.get("regime", "stagnation"))
headline = str(event.get("headline", ""))
system = (
"You are a sharp Indian markets commentator. Given a market event "
"and a player's portfolio snapshot, output ONE sentence (under 140 chars) "
"of actionable insight. Reply ONLY with the insight text. "
"No prefixes, no markdown, no thinking tags, no explanations."
)
prompt = (
f"Event: {headline} (regime: {regime}). "
f"Player P&L ₹{pnl:,.0f}, cash {cash_pct:.0f}%, total ₹{total:,.0f}. "
f"One actionable sentence."
)
try:
text = generate(prompt, system=system, max_tokens=100, temperature=0.4).strip()
text = sanitize_for_display(text, 200)
except Exception:
text = ""
if not text:
if pnl < -50_000:
text = f"Cut losers in {regime.replace('_', ' ')} regimes and rotate into defensives."
elif pnl > 50_000:
text = f"Book partial profits; {regime.replace('_', ' ')} trends rarely last."
elif cash_pct > 60:
text = "Heavy cash drag. Deploy into bonds or Nifty on dips."
else:
text = f"Hold the line through this {regime.replace('_', ' ')} phase."
return text[:200]
def chat_reply(user_message: str, state_snapshot: Dict) -> str:
pnl = float(state_snapshot.get("unrealized_pnl", 0.0))
cash = float(state_snapshot.get("cash", 0.0))
total = float(state_snapshot.get("total_value", 0.0))
positions = state_snapshot.get("positions", [])
pos_lines = ", ".join(
f"{p['asset']} {p['qty']:.2f} @ ₹{p['price']:.0f}" for p in positions[:8]
) or "no positions"
system = (
"You are Retro Alpha, a sharp Indian markets assistant in a 1990s "
"stock-trading game. Be concise, witty, and grounded in the player's "
"actual positions. Output ONLY 2-3 short sentences. "
"No thinking tags, no markdown, no prefixes, no explanations."
)
prompt = (
f"Portfolio: total ₹{total:,.0f}, cash ₹{cash:,.0f}, "
f"unrealized P&L ₹{pnl:,.0f}. Positions: {pos_lines}.\n"
f"Player: {user_message}\nReply in 2-3 short sentences."
)
try:
text = generate(prompt, system=system, max_tokens=140, temperature=0.5).strip()
text = sanitize_for_display(text, 500)
except Exception:
text = ""
if not text:
if "buy" in user_message.lower() or "should i" in user_message.lower():
text = f"With cash at ₹{cash:,.0f} and P&L ₹{pnl:,.0f}, I'd wait for a confirmed trend before adding. Check the chart for support levels."
elif "sell" in user_message.lower():
text = "Selling into strength is a discipline. If your position is >20% of portfolio, trim 10% and rebalance."
elif pnl < 0:
text = f"You're down ₹{abs(pnl):,.0f}. Don't add to losers. Rotate into bonds or gold until the regime clarifies."
else:
text = f"Up ₹{pnl:,.0f} — not bad. Lock in some gains into FDs so the win isn't just on paper."
return text[:500]
def all_agents_decide(state: Dict) -> List[Dict]:
return [decide_agent(p, state) for p in PERSONAS]
|