RayMelius Claude Sonnet 4.6 commited on
Commit
ff275e2
Β·
1 Parent(s): c04eb58

Restore SYSTEM_PROMPT and finetuned model default for AI Analyst

Browse files

- HF_MODEL default: RayMelius/stockex-analyst (our Round 2 finetuned model)
- Add SYSTEM_PROMPT for correct finetuned model behaviour
- _try_hf: use direct inference API for RayMelius/ models + system prompt

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. ai_analyst/ai_analyst.py +20 -5
ai_analyst/ai_analyst.py CHANGED
@@ -11,7 +11,7 @@ from shared.kafka_utils import create_producer, create_consumer
11
  OLLAMA_HOST = os.getenv("OLLAMA_HOST", "") # e.g. http://host.docker.internal:11434
12
  OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama3.1:8b")
13
  HF_TOKEN = os.getenv("HF_TOKEN", "")
14
- HF_MODEL = os.getenv("HF_MODEL", "Qwen/Qwen2.5-7B-Instruct-1M")
15
  GROQ_API_KEY = os.getenv("GROQ_API_KEY", "")
16
  GROQ_MODEL = os.getenv("GROQ_MODEL", "llama-3.1-8b-instant")
17
  GROQ_URL = "https://api.groq.com/openai/v1/chat/completions"
@@ -21,6 +21,16 @@ ANALYSIS_INTERVAL = int(os.getenv("ANALYSIS_INTERVAL", "1800")) # 30 min defaul
21
  _active_provider = "auto" # "auto" | "ollama" | "groq" | "hf"
22
  _active_model = None # None = use env-var default for chosen provider
23
 
 
 
 
 
 
 
 
 
 
 
24
  # ── Rolling market data buffers ────────────────────────────────────────────────
25
  recent_trades = deque(maxlen=200)
26
  latest_snapshots = {} # symbol -> snapshot dict
@@ -80,15 +90,20 @@ def call_llm(prompt: str) -> str | None:
80
  if not HF_TOKEN:
81
  return None
82
  m = model or HF_MODEL
83
- url = "https://router.huggingface.co/v1/chat/completions"
84
- print(f"[AI-Analyst] Calling HF router: model={m}")
 
 
 
85
  for attempt in range(3):
86
  try:
87
  resp = requests.post(
88
  url,
89
  headers={"Authorization": f"Bearer {HF_TOKEN}", "Content-Type": "application/json"},
90
- json={"model": m, "messages": [{"role": "user", "content": prompt}],
91
- "max_tokens": 220, "temperature": 0.7},
 
 
92
  timeout=60,
93
  )
94
  print(f"[AI-Analyst] HF response status: {resp.status_code}")
 
11
  OLLAMA_HOST = os.getenv("OLLAMA_HOST", "") # e.g. http://host.docker.internal:11434
12
  OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama3.1:8b")
13
  HF_TOKEN = os.getenv("HF_TOKEN", "")
14
+ HF_MODEL = os.getenv("HF_MODEL", "RayMelius/stockex-analyst")
15
  GROQ_API_KEY = os.getenv("GROQ_API_KEY", "")
16
  GROQ_MODEL = os.getenv("GROQ_MODEL", "llama-3.1-8b-instant")
17
  GROQ_URL = "https://api.groq.com/openai/v1/chat/completions"
 
21
  _active_provider = "auto" # "auto" | "ollama" | "groq" | "hf"
22
  _active_model = None # None = use env-var default for chosen provider
23
 
24
+ # System prompt matching the finetuned model's training
25
+ SYSTEM_PROMPT = (
26
+ "You are StockEx AI Analyst, an expert in stock market microstructure, "
27
+ "order book dynamics, and real-time trading analysis for the Athens Stock Exchange. "
28
+ "When given market data, respond with a single flowing paragraph of natural market "
29
+ "commentary. Mention specific stocks, prices, trade counts, and volumes where relevant. "
30
+ "Assess sentiment (bullish/bearish/cautious/neutral) and give a forward-looking observation. "
31
+ "Do not use bullet points, headers, or JSON. Write like a professional market analyst."
32
+ )
33
+
34
  # ── Rolling market data buffers ────────────────────────────────────────────────
35
  recent_trades = deque(maxlen=200)
36
  latest_snapshots = {} # symbol -> snapshot dict
 
90
  if not HF_TOKEN:
91
  return None
92
  m = model or HF_MODEL
93
+ if m.startswith("RayMelius/") or "/" in m:
94
+ url = f"https://api-inference.huggingface.co/models/{m}/v1/chat/completions"
95
+ else:
96
+ url = "https://router.huggingface.co/v1/chat/completions"
97
+ print(f"[AI-Analyst] Calling HF: model={m}")
98
  for attempt in range(3):
99
  try:
100
  resp = requests.post(
101
  url,
102
  headers={"Authorization": f"Bearer {HF_TOKEN}", "Content-Type": "application/json"},
103
+ json={"model": m,
104
+ "messages": [{"role": "system", "content": SYSTEM_PROMPT},
105
+ {"role": "user", "content": prompt}],
106
+ "max_tokens": 300, "temperature": 0.7},
107
  timeout=60,
108
  )
109
  print(f"[AI-Analyst] HF response status: {resp.status_code}")