RayMelius Claude Sonnet 4.6 commited on
Commit
c04eb58
Β·
1 Parent(s): e9af92b

Add dynamic LLM provider/model selection to AI Analyst

Browse files

- ai_analyst: add Groq support, _active_provider/_active_model globals,
refactored call_llm routing (Ollama β†’ Groq β†’ HF auto fallback),
handle Kafka 'set_llm' control message to switch provider at runtime
- dashboard: /ai/select now publishes set_llm to Kafka control topic so
the AI Analyst service picks up provider changes immediately
- dashboard + index.html: sync LLM selector UI (provider/model dropdowns,
badge, SSE llm_config event) to latest working version

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

ai_analyst/ai_analyst.py CHANGED
@@ -12,8 +12,15 @@ OLLAMA_HOST = os.getenv("OLLAMA_HOST", "") # e.g. http://host.docker
12
  OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama3.1:8b")
13
  HF_TOKEN = os.getenv("HF_TOKEN", "")
14
  HF_MODEL = os.getenv("HF_MODEL", "Qwen/Qwen2.5-7B-Instruct-1M")
 
 
 
15
  ANALYSIS_INTERVAL = int(os.getenv("ANALYSIS_INTERVAL", "1800")) # 30 min default
16
 
 
 
 
 
17
  # ── Rolling market data buffers ────────────────────────────────────────────────
18
  recent_trades = deque(maxlen=200)
19
  latest_snapshots = {} # symbol -> snapshot dict
@@ -25,69 +32,104 @@ _suspended = False
25
  # ── LLM call ──────────────────────────────────────────────────────────────────
26
 
27
  def call_llm(prompt: str) -> str | None:
28
- """Try Ollama first, fall back to HuggingFace Inference API."""
29
 
30
- # 1. Ollama (local)
31
- if OLLAMA_HOST:
 
 
32
  try:
33
  resp = requests.post(
34
  f"{OLLAMA_HOST}/api/chat",
35
- json={
36
- "model": OLLAMA_MODEL,
37
- "messages": [{"role": "user", "content": prompt}],
38
- "stream": False,
39
- },
40
  timeout=90,
41
  )
42
  if resp.status_code == 200:
43
  text = resp.json().get("message", {}).get("content", "").strip()
44
  if text:
45
- print(f"[AI-Analyst] Insight via Ollama ({OLLAMA_MODEL})")
46
  return text
47
- else:
48
- print(f"[AI-Analyst] Ollama HTTP {resp.status_code}: {resp.text[:200]}")
49
  except Exception as e:
50
- print(f"[AI-Analyst] Ollama unreachable: {e}")
 
51
 
52
- # 2. HuggingFace Inference API β€” router.huggingface.co (OpenAI-compatible)
53
- if HF_TOKEN:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  url = "https://router.huggingface.co/v1/chat/completions"
55
- print(f"[AI-Analyst] Calling HF router: model={HF_MODEL}")
56
  for attempt in range(3):
57
  try:
58
  resp = requests.post(
59
  url,
60
- headers={
61
- "Authorization": f"Bearer {HF_TOKEN}",
62
- "Content-Type": "application/json",
63
- },
64
- json={
65
- "model": HF_MODEL,
66
- "messages": [{"role": "user", "content": prompt}],
67
- "max_tokens": 220,
68
- "temperature": 0.7,
69
- },
70
  timeout=60,
71
  )
72
  print(f"[AI-Analyst] HF response status: {resp.status_code}")
73
  if resp.status_code == 200:
74
  text = resp.json()["choices"][0]["message"]["content"].strip()
75
  if text:
76
- print(f"[AI-Analyst] Insight via HuggingFace ({HF_MODEL})")
77
  return text
78
  elif resp.status_code == 503:
79
  body = resp.json() if resp.content else {}
80
- wait = body.get("estimated_time", 20)
81
  print(f"[AI-Analyst] HF model loading, waiting {wait:.0f}s (attempt {attempt+1}/3)")
82
- time.sleep(min(float(wait), 30))
83
  else:
84
  print(f"[AI-Analyst] HF HTTP {resp.status_code}: {resp.text[:400]}")
85
  break
86
  except Exception as e:
87
  print(f"[AI-Analyst] HF API error (attempt {attempt+1}/3): {e}")
88
  break
 
 
 
 
89
 
90
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
 
93
  # ── Prompt builder ─────────────────────────────────────────────────────────────
@@ -166,7 +208,7 @@ def run_immediate_analysis(producer):
166
  # ── Kafka consumer (market data) ──────────────────────────────────────────────
167
 
168
  def consume_market_data(producer):
169
- global _running, _suspended
170
  consumer = create_consumer(
171
  topics=[
172
  Config.TRADES_TOPIC,
@@ -199,6 +241,11 @@ def consume_market_data(producer):
199
  _suspended = False
200
  elif action == "generate_insight":
201
  threading.Thread(target=run_immediate_analysis, args=(producer,), daemon=True).start()
 
 
 
 
 
202
 
203
 
204
  # ── Analysis loop ──────────────────────────────────────────────────────────────
@@ -207,10 +254,13 @@ def analysis_loop(producer):
207
  print(f"[AI-Analyst] Analysis loop started (interval={ANALYSIS_INTERVAL}s)")
208
  if OLLAMA_HOST:
209
  print(f"[AI-Analyst] Ollama: {OLLAMA_HOST} model: {OLLAMA_MODEL}")
 
 
210
  if HF_TOKEN:
211
  print(f"[AI-Analyst] HuggingFace fallback: model={HF_MODEL}")
212
- if not OLLAMA_HOST and not HF_TOKEN:
213
- print("[AI-Analyst] WARNING: neither OLLAMA_HOST nor HF_TOKEN configured β€” no insights will be generated")
 
214
 
215
  while True:
216
  time.sleep(ANALYSIS_INTERVAL)
 
12
  OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama3.1:8b")
13
  HF_TOKEN = os.getenv("HF_TOKEN", "")
14
  HF_MODEL = os.getenv("HF_MODEL", "Qwen/Qwen2.5-7B-Instruct-1M")
15
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY", "")
16
+ GROQ_MODEL = os.getenv("GROQ_MODEL", "llama-3.1-8b-instant")
17
+ GROQ_URL = "https://api.groq.com/openai/v1/chat/completions"
18
  ANALYSIS_INTERVAL = int(os.getenv("ANALYSIS_INTERVAL", "1800")) # 30 min default
19
 
20
+ # ── Runtime LLM selection (updated via Kafka "set_llm" control messages) ───────
21
+ _active_provider = "auto" # "auto" | "ollama" | "groq" | "hf"
22
+ _active_model = None # None = use env-var default for chosen provider
23
+
24
  # ── Rolling market data buffers ────────────────────────────────────────────────
25
  recent_trades = deque(maxlen=200)
26
  latest_snapshots = {} # symbol -> snapshot dict
 
32
  # ── LLM call ──────────────────────────────────────────────────────────────────
33
 
34
  def call_llm(prompt: str) -> str | None:
35
+ """Route to the active provider (or auto-fallback chain: Ollama β†’ Groq β†’ HF)."""
36
 
37
+ def _try_ollama(model):
38
+ if not OLLAMA_HOST:
39
+ return None
40
+ m = model or OLLAMA_MODEL
41
  try:
42
  resp = requests.post(
43
  f"{OLLAMA_HOST}/api/chat",
44
+ json={"model": m, "messages": [{"role": "user", "content": prompt}], "stream": False},
 
 
 
 
45
  timeout=90,
46
  )
47
  if resp.status_code == 200:
48
  text = resp.json().get("message", {}).get("content", "").strip()
49
  if text:
50
+ print(f"[AI-Analyst] Insight via Ollama ({m})")
51
  return text
52
+ print(f"[AI-Analyst] Ollama HTTP {resp.status_code}: {resp.text[:200]}")
 
53
  except Exception as e:
54
+ print(f"[AI-Analyst] Ollama error: {e}")
55
+ return None
56
 
57
+ def _try_groq(model):
58
+ if not GROQ_API_KEY:
59
+ return None
60
+ m = model or GROQ_MODEL
61
+ try:
62
+ resp = requests.post(
63
+ GROQ_URL,
64
+ headers={"Authorization": f"Bearer {GROQ_API_KEY}", "Content-Type": "application/json"},
65
+ json={"model": m, "messages": [{"role": "user", "content": prompt}],
66
+ "max_tokens": 300, "temperature": 0.7},
67
+ timeout=30,
68
+ )
69
+ if resp.status_code == 200:
70
+ text = resp.json()["choices"][0]["message"]["content"].strip()
71
+ if text:
72
+ print(f"[AI-Analyst] Insight via Groq ({m})")
73
+ return text
74
+ print(f"[AI-Analyst] Groq HTTP {resp.status_code}: {resp.text[:200]}")
75
+ except Exception as e:
76
+ print(f"[AI-Analyst] Groq error: {e}")
77
+ return None
78
+
79
+ def _try_hf(model):
80
+ if not HF_TOKEN:
81
+ return None
82
+ m = model or HF_MODEL
83
  url = "https://router.huggingface.co/v1/chat/completions"
84
+ print(f"[AI-Analyst] Calling HF router: model={m}")
85
  for attempt in range(3):
86
  try:
87
  resp = requests.post(
88
  url,
89
+ headers={"Authorization": f"Bearer {HF_TOKEN}", "Content-Type": "application/json"},
90
+ json={"model": m, "messages": [{"role": "user", "content": prompt}],
91
+ "max_tokens": 220, "temperature": 0.7},
 
 
 
 
 
 
 
92
  timeout=60,
93
  )
94
  print(f"[AI-Analyst] HF response status: {resp.status_code}")
95
  if resp.status_code == 200:
96
  text = resp.json()["choices"][0]["message"]["content"].strip()
97
  if text:
98
+ print(f"[AI-Analyst] Insight via HuggingFace ({m})")
99
  return text
100
  elif resp.status_code == 503:
101
  body = resp.json() if resp.content else {}
102
+ wait = min(float(body.get("estimated_time", 20)), 30)
103
  print(f"[AI-Analyst] HF model loading, waiting {wait:.0f}s (attempt {attempt+1}/3)")
104
+ time.sleep(wait)
105
  else:
106
  print(f"[AI-Analyst] HF HTTP {resp.status_code}: {resp.text[:400]}")
107
  break
108
  except Exception as e:
109
  print(f"[AI-Analyst] HF API error (attempt {attempt+1}/3): {e}")
110
  break
111
+ return None
112
+
113
+ provider = _active_provider
114
+ model = _active_model
115
 
116
+ if provider == "ollama":
117
+ return _try_ollama(model)
118
+ if provider == "groq":
119
+ return _try_groq(model)
120
+ if provider == "hf":
121
+ return _try_hf(model)
122
+
123
+ # Auto fallback chain
124
+ if OLLAMA_HOST:
125
+ text = _try_ollama(model)
126
+ if text:
127
+ return text
128
+ if GROQ_API_KEY:
129
+ text = _try_groq(model)
130
+ if text:
131
+ return text
132
+ return _try_hf(model)
133
 
134
 
135
  # ── Prompt builder ─────────────────────────────────────────────────────────────
 
208
  # ── Kafka consumer (market data) ──────────────────────────────────────────────
209
 
210
  def consume_market_data(producer):
211
+ global _running, _suspended, _active_provider, _active_model
212
  consumer = create_consumer(
213
  topics=[
214
  Config.TRADES_TOPIC,
 
241
  _suspended = False
242
  elif action == "generate_insight":
243
  threading.Thread(target=run_immediate_analysis, args=(producer,), daemon=True).start()
244
+ elif action == "set_llm":
245
+ _active_provider = msg.value.get("provider", "auto")
246
+ _active_model = msg.value.get("model") or None
247
+ label = f"{_active_provider}/{_active_model}" if _active_model else _active_provider
248
+ print(f"[AI-Analyst] LLM switched to: {label}")
249
 
250
 
251
  # ── Analysis loop ──────────────────────────────────────────────────────────────
 
254
  print(f"[AI-Analyst] Analysis loop started (interval={ANALYSIS_INTERVAL}s)")
255
  if OLLAMA_HOST:
256
  print(f"[AI-Analyst] Ollama: {OLLAMA_HOST} model: {OLLAMA_MODEL}")
257
+ if GROQ_API_KEY:
258
+ print(f"[AI-Analyst] Groq model: {GROQ_MODEL}")
259
  if HF_TOKEN:
260
  print(f"[AI-Analyst] HuggingFace fallback: model={HF_MODEL}")
261
+ if not OLLAMA_HOST and not GROQ_API_KEY and not HF_TOKEN:
262
+ print("[AI-Analyst] WARNING: no LLM configured β€” no insights will be generated")
263
+ print(f"[AI-Analyst] Active provider: {_active_provider} (send Kafka 'set_llm' to change)")
264
 
265
  while True:
266
  time.sleep(ANALYSIS_INTERVAL)
dashboard/dashboard.py CHANGED
@@ -28,7 +28,7 @@ FRONTEND_URL = os.getenv("FRONTEND_URL", "")
28
 
29
  # ── AI Analyst (inline LLM for on-demand generation) ───────────────────────────
30
  HF_TOKEN = os.getenv("HF_TOKEN", "")
31
- HF_MODEL = os.getenv("HF_MODEL", "Qwen/Qwen2.5-7B-Instruct")
32
  HF_URL = "https://router.huggingface.co/v1/chat/completions"
33
  GROQ_API_KEY = os.getenv("GROQ_API_KEY", "")
34
  GROQ_MODEL = os.getenv("GROQ_MODEL", "llama-3.1-8b-instant")
@@ -36,6 +36,25 @@ GROQ_URL = "https://api.groq.com/openai/v1/chat/completions"
36
  OLLAMA_HOST = os.getenv("OLLAMA_HOST", "")
37
  OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama3.1:8b")
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
  def _build_market_prompt():
41
  with lock:
@@ -76,91 +95,111 @@ def _build_market_prompt():
76
  f"Plain prose, no headers, no bullet points.")
77
 
78
 
79
- def _call_llm(prompt):
80
- """Try Ollama first, then HuggingFace router. Returns (text, source) or (None, error_msg)."""
81
- # 1. Ollama
82
- if OLLAMA_HOST:
 
 
 
 
 
 
 
 
83
  try:
84
  r = requests.post(f"{OLLAMA_HOST}/api/chat",
85
- json={"model": OLLAMA_MODEL,
86
- "messages": [{"role": "user", "content": prompt}],
87
  "stream": False},
88
  timeout=90)
89
  if r.status_code == 200:
90
  text = r.json().get("message", {}).get("content", "").strip()
91
  if text:
92
- return text, "Ollama"
93
- print(f"[Dashboard/LLM] Ollama {r.status_code}: {r.text[:200]}")
94
  except Exception as e:
95
- print(f"[Dashboard/LLM] Ollama error: {e}")
96
 
97
- # 2. Groq (free, fast)
98
- if GROQ_API_KEY:
 
 
99
  try:
100
  r = requests.post(GROQ_URL,
101
  headers={"Authorization": f"Bearer {GROQ_API_KEY}",
102
  "Content-Type": "application/json"},
103
- json={"model": GROQ_MODEL,
104
- "messages": [{"role": "user", "content": prompt}],
105
- "max_tokens": 180,
106
- "temperature": 0.7},
107
  timeout=30)
108
  print(f"[Dashboard/LLM] Groq status {r.status_code}")
109
  if r.status_code == 200:
110
  text = r.json()["choices"][0]["message"]["content"].strip()
111
  if text:
112
- return text, f"Groq/{GROQ_MODEL}"
113
- else:
114
- print(f"[Dashboard/LLM] Groq error: {r.text[:200]}")
115
- except Exception as e:
116
- print(f"[Dashboard/LLM] Groq exception: {e}")
117
-
118
- # 3. HuggingFace router
119
- if not HF_TOKEN:
120
- return None, "No LLM configured. Set GROQ_API_KEY (free at console.groq.com) or HF_TOKEN."
121
- print(f"[Dashboard/LLM] Calling HF router ({HF_MODEL})…")
122
- for attempt in range(3):
123
- try:
124
- r = requests.post(HF_URL,
125
- headers={"Authorization": f"Bearer {HF_TOKEN}",
126
- "Content-Type": "application/json"},
127
- json={"model": HF_MODEL,
128
- "messages": [{"role": "user", "content": prompt}],
129
- "max_tokens": 180,
130
- "temperature": 0.7},
131
- timeout=90)
132
- print(f"[Dashboard/LLM] HF status {r.status_code} (attempt {attempt+1})")
133
- if r.status_code == 200:
134
- text = r.json()["choices"][0]["message"]["content"].strip()
135
- if text:
136
- return text, HF_MODEL
137
- elif r.status_code == 503:
138
- body = {}
139
- try: body = r.json()
140
- except: pass
141
- wait = min(float(body.get("estimated_time", 20)), 30)
142
- print(f"[Dashboard/LLM] Model loading, waiting {wait:.0f}s…")
143
- time.sleep(wait)
144
- else:
145
- print(f"[Dashboard/LLM] HF error body: {r.text[:400]}")
146
- try:
147
- err_code = r.json().get("error", {}).get("code", "")
148
- except Exception:
149
- err_code = ""
150
- if r.status_code == 402 or "credit" in r.text.lower() or "depleted" in r.text.lower():
151
- return None, ("HF credit balance depleted. Add GROQ_API_KEY secret instead "
152
- "(free at console.groq.com β€” 14,400 req/day).")
153
- if err_code == "model_not_supported" or "provider" in r.text.lower():
154
- return None, (f"Model '{HF_MODEL}' not available on any enabled provider. "
155
- "Set HF_MODEL secret to a supported model (e.g. Qwen/Qwen2.5-7B-Instruct).")
156
- return None, f"HF HTTP {r.status_code}: {r.text[:120]}"
157
- except requests.exceptions.Timeout:
158
- print(f"[Dashboard/LLM] HF timeout (attempt {attempt+1})")
159
- return None, "HF request timed out after 90s"
160
  except Exception as e:
161
- print(f"[Dashboard/LLM] HF exception: {e}")
162
- return None, str(e)
163
- return None, "HF: max retries exceeded"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
 
165
 
166
  def _generate_and_broadcast():
@@ -171,7 +210,7 @@ def _generate_and_broadcast():
171
  return
172
 
173
  prompt = _build_market_prompt()
174
- text, source = _call_llm(prompt)
175
  if text:
176
  insight = {"text": text, "source": source, "timestamp": time.time()}
177
  with lock:
@@ -615,6 +654,51 @@ def trigger_ai_insight():
615
  return jsonify({"status": "ok", "message": "Insight generation started"})
616
 
617
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
618
  @app.route("/ai/debug")
619
  def ai_debug():
620
  """Synchronous LLM test β€” returns raw API result for debugging."""
 
28
 
29
  # ── AI Analyst (inline LLM for on-demand generation) ───────────────────────────
30
  HF_TOKEN = os.getenv("HF_TOKEN", "")
31
+ HF_MODEL = os.getenv("HF_MODEL", "RayMelius/stockex-analyst")
32
  HF_URL = "https://router.huggingface.co/v1/chat/completions"
33
  GROQ_API_KEY = os.getenv("GROQ_API_KEY", "")
34
  GROQ_MODEL = os.getenv("GROQ_MODEL", "llama-3.1-8b-instant")
 
36
  OLLAMA_HOST = os.getenv("OLLAMA_HOST", "")
37
  OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama3.1:8b")
38
 
39
+ # Known model lists for the dynamic selector UI
40
+ GROQ_MODELS = [
41
+ "llama-3.1-8b-instant",
42
+ "llama-3.3-70b-versatile",
43
+ "llama-3.1-70b-versatile",
44
+ "mixtral-8x7b-32768",
45
+ "gemma2-9b-it",
46
+ ]
47
+ HF_MODELS = [
48
+ "RayMelius/stockex-analyst",
49
+ "Qwen/Qwen2.5-7B-Instruct-1M",
50
+ "meta-llama/Llama-3.1-8B-Instruct",
51
+ "mistralai/Mistral-7B-Instruct-v0.3",
52
+ ]
53
+
54
+ # Runtime LLM selection (overrides env var defaults when set via /ai/select)
55
+ _active_provider = "auto" # "auto" | "ollama" | "groq" | "hf"
56
+ _active_model = None # str override or None = use env var default
57
+
58
 
59
  def _build_market_prompt():
60
  with lock:
 
95
  f"Plain prose, no headers, no bullet points.")
96
 
97
 
98
+ def _call_llm(prompt, force_provider=None, force_model=None):
99
+ """Call LLM. Returns (text, source) or (None, error_msg).
100
+ force_provider: "auto"|"ollama"|"groq"|"hf"|None β€” selects which provider to use.
101
+ force_model: override the default model name for the chosen provider.
102
+ When force_provider is "auto" or None, falls back through Ollama -> Groq -> HF.
103
+ """
104
+ provider = force_provider or "auto"
105
+
106
+ def _try_ollama(model):
107
+ if not OLLAMA_HOST:
108
+ return None, "Ollama not configured (OLLAMA_HOST not set)"
109
+ m = model or OLLAMA_MODEL
110
  try:
111
  r = requests.post(f"{OLLAMA_HOST}/api/chat",
112
+ json={"model": m, "messages": [{"role": "user", "content": prompt}],
 
113
  "stream": False},
114
  timeout=90)
115
  if r.status_code == 200:
116
  text = r.json().get("message", {}).get("content", "").strip()
117
  if text:
118
+ return text, f"Ollama/{m}"
119
+ return None, f"Ollama HTTP {r.status_code}: {r.text[:200]}"
120
  except Exception as e:
121
+ return None, f"Ollama error: {e}"
122
 
123
+ def _try_groq(model):
124
+ if not GROQ_API_KEY:
125
+ return None, "Groq not configured (GROQ_API_KEY not set)"
126
+ m = model or GROQ_MODEL
127
  try:
128
  r = requests.post(GROQ_URL,
129
  headers={"Authorization": f"Bearer {GROQ_API_KEY}",
130
  "Content-Type": "application/json"},
131
+ json={"model": m, "messages": [{"role": "user", "content": prompt}],
132
+ "max_tokens": 300, "temperature": 0.7},
 
 
133
  timeout=30)
134
  print(f"[Dashboard/LLM] Groq status {r.status_code}")
135
  if r.status_code == 200:
136
  text = r.json()["choices"][0]["message"]["content"].strip()
137
  if text:
138
+ return text, f"Groq/{m}"
139
+ return None, f"Groq HTTP {r.status_code}: {r.text[:200]}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  except Exception as e:
141
+ return None, f"Groq error: {e}"
142
+
143
+ def _try_hf(model):
144
+ if not HF_TOKEN:
145
+ return None, "HuggingFace not configured (HF_TOKEN not set)"
146
+ m = model or HF_MODEL
147
+ # Use direct inference API for custom models, router for known public models
148
+ if m.startswith("RayMelius/") or "/" in m.split("/")[0]:
149
+ url = f"https://api-inference.huggingface.co/models/{m}/v1/chat/completions"
150
+ else:
151
+ url = HF_URL
152
+ print(f"[Dashboard/LLM] Calling HF ({m})...")
153
+ for attempt in range(3):
154
+ try:
155
+ r = requests.post(url,
156
+ headers={"Authorization": f"Bearer {HF_TOKEN}",
157
+ "Content-Type": "application/json"},
158
+ json={"model": m,
159
+ "messages": [{"role": "user", "content": prompt}],
160
+ "max_tokens": 300, "temperature": 0.7},
161
+ timeout=90)
162
+ print(f"[Dashboard/LLM] HF status {r.status_code} (attempt {attempt+1})")
163
+ if r.status_code == 200:
164
+ text = r.json()["choices"][0]["message"]["content"].strip()
165
+ if text:
166
+ return text, m
167
+ elif r.status_code == 503:
168
+ body = {}
169
+ try: body = r.json()
170
+ except: pass
171
+ wait = min(float(body.get("estimated_time", 20)), 30)
172
+ print(f"[Dashboard/LLM] Model loading, waiting {wait:.0f}s...")
173
+ time.sleep(wait)
174
+ else:
175
+ print(f"[Dashboard/LLM] HF error: {r.text[:400]}")
176
+ if r.status_code == 402 or "credit" in r.text.lower() or "depleted" in r.text.lower():
177
+ return None, "HF credit depleted. Add GROQ_API_KEY (free at console.groq.com)."
178
+ return None, f"HF HTTP {r.status_code}: {r.text[:120]}"
179
+ except requests.exceptions.Timeout:
180
+ return None, "HF request timed out after 90s"
181
+ except Exception as e:
182
+ return None, f"HF error: {e}"
183
+ return None, "HF: max retries exceeded"
184
+
185
+ # Route to chosen provider or auto-fallback chain
186
+ if provider == "ollama":
187
+ return _try_ollama(force_model)
188
+ if provider == "groq":
189
+ return _try_groq(force_model)
190
+ if provider == "hf":
191
+ return _try_hf(force_model)
192
+
193
+ # Auto: Ollama -> Groq -> HF
194
+ if OLLAMA_HOST:
195
+ text, src = _try_ollama(force_model)
196
+ if text:
197
+ return text, src
198
+ if GROQ_API_KEY:
199
+ text, src = _try_groq(force_model)
200
+ if text:
201
+ return text, src
202
+ return _try_hf(force_model)
203
 
204
 
205
  def _generate_and_broadcast():
 
210
  return
211
 
212
  prompt = _build_market_prompt()
213
+ text, source = _call_llm(prompt, force_provider=_active_provider, force_model=_active_model)
214
  if text:
215
  insight = {"text": text, "source": source, "timestamp": time.time()}
216
  with lock:
 
654
  return jsonify({"status": "ok", "message": "Insight generation started"})
655
 
656
 
657
+ @app.route("/ai/config")
658
+ def ai_config():
659
+ """Return available providers/models and the current active selection."""
660
+ return jsonify({
661
+ "active_provider": _active_provider,
662
+ "active_model": _active_model,
663
+ "providers": {
664
+ "auto": {"label": "Auto (fallback chain)", "models": []},
665
+ "groq": {"label": "Groq", "models": GROQ_MODELS,
666
+ "available": bool(GROQ_API_KEY)},
667
+ "hf": {"label": "HuggingFace", "models": HF_MODELS,
668
+ "available": bool(HF_TOKEN)},
669
+ "ollama": {"label": "Ollama (local)", "models": [OLLAMA_MODEL] if OLLAMA_HOST else [],
670
+ "available": bool(OLLAMA_HOST)},
671
+ },
672
+ })
673
+
674
+
675
+ @app.route("/ai/select", methods=["POST"])
676
+ def ai_select():
677
+ """Dynamically switch the LLM provider/model used for AI insights."""
678
+ global _active_provider, _active_model
679
+ data = request.get_json(force=True, silent=True) or {}
680
+ provider = data.get("provider", "auto")
681
+ model = data.get("model") or None
682
+
683
+ allowed = {"auto", "groq", "hf", "ollama"}
684
+ if provider not in allowed:
685
+ return jsonify({"status": "error", "error": f"Unknown provider '{provider}'"}), 400
686
+
687
+ _active_provider = provider
688
+ _active_model = model
689
+ label = f"{provider}/{model}" if model else provider
690
+ print(f"[Dashboard/LLM] Provider switched to: {label}")
691
+ broadcast_event("llm_config", {"provider": _active_provider, "model": _active_model})
692
+ # Propagate selection to ai_analyst service via Kafka control topic
693
+ try:
694
+ p = get_producer()
695
+ p.send(Config.CONTROL_TOPIC, {"action": "set_llm", "provider": provider, "model": model})
696
+ p.flush()
697
+ except Exception as e:
698
+ print(f"[Dashboard/LLM] Could not publish set_llm to Kafka: {e}")
699
+ return jsonify({"status": "ok", "provider": _active_provider, "model": _active_model})
700
+
701
+
702
  @app.route("/ai/debug")
703
  def ai_debug():
704
  """Synchronous LLM test β€” returns raw API result for debugging."""
dashboard/templates/index.html CHANGED
@@ -379,14 +379,30 @@
379
 
380
  <!-- AI Analyst panel (full width) -->
381
  <div class="ai-panel">
382
- <h2 style="margin:0 0 8px; font-size:15px; display:flex; align-items:center; gap:10px;">
383
  AI Analyst
384
  <button id="ai-generate-btn" onclick="triggerAIInsight()"
385
  style="padding:5px 14px; background:#5c6bc0; color:#fff; border:none; border-radius:12px;
386
  font-size:12px; font-weight:bold; cursor:pointer; flex-shrink:0;">
387
  ✨ Generate Now
388
  </button>
389
- <span id="ai-status" style="font-size:11px; color:#999; font-weight:normal;">waiting for first insight…</span>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
390
  </h2>
391
  <div id="ai-insights-list" style="max-height:220px; overflow-y:auto;">
392
  <div class="insight-card" style="color:#bbb; border-left-color:#ddd; background:#fafafa;" id="ai-placeholder">
@@ -433,6 +449,80 @@
433
  // Selected order state
434
  let selectedOrder = null;
435
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
436
  async function triggerAIInsight() {
437
  const btn = document.getElementById("ai-generate-btn");
438
  const status = document.getElementById("ai-status");
@@ -472,7 +562,10 @@
472
  div.className = "insight-card insight-new";
473
  if (isErr) div.style.cssText = "border-left-color:#e53935; background:#fff5f5;";
474
  const t = new Date(insight.timestamp * 1000).toLocaleTimeString();
475
- div.innerHTML = `<div class="insight-time">${t}</div><div>${insight.text}</div>`;
 
 
 
476
  list.prepend(div);
477
  while (list.children.length > 10) list.removeChild(list.lastChild);
478
  }
@@ -1128,6 +1221,15 @@
1128
  insights.forEach(addInsight);
1129
  });
1130
 
 
 
 
 
 
 
 
 
 
1131
  eventSource.addEventListener("ai_insight", (e) => {
1132
  const insight = JSON.parse(e.data);
1133
  addInsight(insight);
@@ -1512,6 +1614,7 @@
1512
  async function init() {
1513
  await fetchData();
1514
  connectSSE();
 
1515
 
1516
  // Refresh order book panel every 3 seconds
1517
  setInterval(() => {
 
379
 
380
  <!-- AI Analyst panel (full width) -->
381
  <div class="ai-panel">
382
+ <h2 style="margin:0 0 8px; font-size:15px; display:flex; align-items:center; gap:8px; flex-wrap:wrap;">
383
  AI Analyst
384
  <button id="ai-generate-btn" onclick="triggerAIInsight()"
385
  style="padding:5px 14px; background:#5c6bc0; color:#fff; border:none; border-radius:12px;
386
  font-size:12px; font-weight:bold; cursor:pointer; flex-shrink:0;">
387
  ✨ Generate Now
388
  </button>
389
+ <!-- LLM selector -->
390
+ <select id="ai-provider-select" onchange="onProviderChange()"
391
+ style="padding:4px 8px; border:1px solid #c5cae9; border-radius:8px; font-size:12px;
392
+ background:#f8f9ff; color:#3949ab; cursor:pointer; flex-shrink:0;">
393
+ <option value="auto">Auto</option>
394
+ <option value="groq">Groq</option>
395
+ <option value="hf">HuggingFace</option>
396
+ <option value="ollama">Ollama</option>
397
+ </select>
398
+ <select id="ai-model-select" onchange="onModelChange()"
399
+ style="padding:4px 8px; border:1px solid #c5cae9; border-radius:8px; font-size:12px;
400
+ background:#f8f9ff; color:#3949ab; cursor:pointer; flex-shrink:0; max-width:220px;">
401
+ <option value="">β€” model β€”</option>
402
+ </select>
403
+ <span id="ai-llm-badge" style="font-size:10px; color:#7986cb; background:#e8eaf6; padding:2px 8px;
404
+ border-radius:10px; white-space:nowrap; flex-shrink:0;"></span>
405
+ <span id="ai-status" style="font-size:11px; color:#999; font-weight:normal; margin-left:auto;">waiting for first insight…</span>
406
  </h2>
407
  <div id="ai-insights-list" style="max-height:220px; overflow-y:auto;">
408
  <div class="insight-card" style="color:#bbb; border-left-color:#ddd; background:#fafafa;" id="ai-placeholder">
 
449
  // Selected order state
450
  let selectedOrder = null;
451
 
452
+ // ── LLM selector ────────────────────────────────────────────────────────────
453
+ const LLM_MODELS = { auto: [], groq: [], hf: [], ollama: [] };
454
+
455
+ async function loadAIConfig() {
456
+ try {
457
+ const r = await fetch("/ai/config");
458
+ const cfg = await r.json();
459
+ // Populate model lists from server
460
+ for (const [p, info] of Object.entries(cfg.providers || {})) {
461
+ if (info.models && info.models.length) LLM_MODELS[p] = info.models;
462
+ }
463
+ // Restore active selection
464
+ const prov = cfg.active_provider || "auto";
465
+ document.getElementById("ai-provider-select").value = prov;
466
+ populateModelDropdown(prov, cfg.active_model);
467
+ updateLLMBadge(prov, cfg.active_model);
468
+ } catch(e) { console.warn("Could not load AI config:", e); }
469
+ }
470
+
471
+ function populateModelDropdown(provider, selectedModel) {
472
+ const sel = document.getElementById("ai-model-select");
473
+ const models = LLM_MODELS[provider] || [];
474
+ sel.innerHTML = "";
475
+ if (provider === "auto" || models.length === 0) {
476
+ sel.style.display = "none";
477
+ return;
478
+ }
479
+ sel.style.display = "";
480
+ for (const m of models) {
481
+ const opt = document.createElement("option");
482
+ opt.value = m;
483
+ opt.textContent = m.split("/").pop(); // show only model name, not org prefix
484
+ if (m === selectedModel) opt.selected = true;
485
+ sel.appendChild(opt);
486
+ }
487
+ if (!selectedModel) sel.selectedIndex = 0;
488
+ }
489
+
490
+ function updateLLMBadge(provider, model) {
491
+ const badge = document.getElementById("ai-llm-badge");
492
+ if (provider === "auto") {
493
+ badge.textContent = "auto";
494
+ } else {
495
+ const name = model ? model.split("/").pop() : "default";
496
+ badge.textContent = `${provider} / ${name}`;
497
+ }
498
+ }
499
+
500
+ async function sendLLMSelection(provider, model) {
501
+ try {
502
+ await fetch("/ai/select", {
503
+ method: "POST",
504
+ headers: { "Content-Type": "application/json" },
505
+ body: JSON.stringify({ provider, model: model || null }),
506
+ });
507
+ } catch(e) { console.warn("Could not update LLM selection:", e); }
508
+ }
509
+
510
+ function onProviderChange() {
511
+ const provider = document.getElementById("ai-provider-select").value;
512
+ populateModelDropdown(provider, null);
513
+ const model = LLM_MODELS[provider]?.[0] || null;
514
+ updateLLMBadge(provider, model);
515
+ sendLLMSelection(provider, model);
516
+ }
517
+
518
+ function onModelChange() {
519
+ const provider = document.getElementById("ai-provider-select").value;
520
+ const model = document.getElementById("ai-model-select").value || null;
521
+ updateLLMBadge(provider, model);
522
+ sendLLMSelection(provider, model);
523
+ }
524
+ // ────────────────────────────────────────────────────────────────────────────
525
+
526
  async function triggerAIInsight() {
527
  const btn = document.getElementById("ai-generate-btn");
528
  const status = document.getElementById("ai-status");
 
562
  div.className = "insight-card insight-new";
563
  if (isErr) div.style.cssText = "border-left-color:#e53935; background:#fff5f5;";
564
  const t = new Date(insight.timestamp * 1000).toLocaleTimeString();
565
+ const srcBadge = insight.source && insight.source !== "error" && insight.source !== "config"
566
+ ? `<span style="font-size:10px; background:#e8eaf6; color:#5c6bc0; padding:1px 6px; border-radius:8px; margin-left:6px;">${insight.source}</span>`
567
+ : "";
568
+ div.innerHTML = `<div class="insight-time">${t}${srcBadge}</div><div>${insight.text}</div>`;
569
  list.prepend(div);
570
  while (list.children.length > 10) list.removeChild(list.lastChild);
571
  }
 
1221
  insights.forEach(addInsight);
1222
  });
1223
 
1224
+ eventSource.addEventListener("llm_config", (e) => {
1225
+ const cfg = JSON.parse(e.data);
1226
+ const prov = cfg.provider || "auto";
1227
+ const model = cfg.model || null;
1228
+ document.getElementById("ai-provider-select").value = prov;
1229
+ populateModelDropdown(prov, model);
1230
+ updateLLMBadge(prov, model);
1231
+ });
1232
+
1233
  eventSource.addEventListener("ai_insight", (e) => {
1234
  const insight = JSON.parse(e.data);
1235
  addInsight(insight);
 
1614
  async function init() {
1615
  await fetchData();
1616
  connectSSE();
1617
+ loadAIConfig();
1618
 
1619
  // Refresh order book panel every 3 seconds
1620
  setInterval(() => {