Restore SYSTEM_PROMPT and finetuned model default for AI Analyst
Browse files- HF_MODEL default: RayMelius/stockex-analyst (our Round 2 finetuned model)
- Add SYSTEM_PROMPT for correct finetuned model behaviour
- _try_hf: use direct inference API for RayMelius/ models + system prompt
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- ai_analyst/ai_analyst.py +20 -5
ai_analyst/ai_analyst.py
CHANGED
|
@@ -11,7 +11,7 @@ from shared.kafka_utils import create_producer, create_consumer
|
|
| 11 |
OLLAMA_HOST = os.getenv("OLLAMA_HOST", "") # e.g. http://host.docker.internal:11434
|
| 12 |
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama3.1:8b")
|
| 13 |
HF_TOKEN = os.getenv("HF_TOKEN", "")
|
| 14 |
-
HF_MODEL = os.getenv("HF_MODEL", "
|
| 15 |
GROQ_API_KEY = os.getenv("GROQ_API_KEY", "")
|
| 16 |
GROQ_MODEL = os.getenv("GROQ_MODEL", "llama-3.1-8b-instant")
|
| 17 |
GROQ_URL = "https://api.groq.com/openai/v1/chat/completions"
|
|
@@ -21,6 +21,16 @@ ANALYSIS_INTERVAL = int(os.getenv("ANALYSIS_INTERVAL", "1800")) # 30 min defaul
|
|
| 21 |
_active_provider = "auto" # "auto" | "ollama" | "groq" | "hf"
|
| 22 |
_active_model = None # None = use env-var default for chosen provider
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
# ββ Rolling market data buffers ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 25 |
recent_trades = deque(maxlen=200)
|
| 26 |
latest_snapshots = {} # symbol -> snapshot dict
|
|
@@ -80,15 +90,20 @@ def call_llm(prompt: str) -> str | None:
|
|
| 80 |
if not HF_TOKEN:
|
| 81 |
return None
|
| 82 |
m = model or HF_MODEL
|
| 83 |
-
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
| 85 |
for attempt in range(3):
|
| 86 |
try:
|
| 87 |
resp = requests.post(
|
| 88 |
url,
|
| 89 |
headers={"Authorization": f"Bearer {HF_TOKEN}", "Content-Type": "application/json"},
|
| 90 |
-
json={"model": m,
|
| 91 |
-
"
|
|
|
|
|
|
|
| 92 |
timeout=60,
|
| 93 |
)
|
| 94 |
print(f"[AI-Analyst] HF response status: {resp.status_code}")
|
|
|
|
| 11 |
OLLAMA_HOST = os.getenv("OLLAMA_HOST", "") # e.g. http://host.docker.internal:11434
|
| 12 |
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama3.1:8b")
|
| 13 |
HF_TOKEN = os.getenv("HF_TOKEN", "")
|
| 14 |
+
HF_MODEL = os.getenv("HF_MODEL", "RayMelius/stockex-analyst")
|
| 15 |
GROQ_API_KEY = os.getenv("GROQ_API_KEY", "")
|
| 16 |
GROQ_MODEL = os.getenv("GROQ_MODEL", "llama-3.1-8b-instant")
|
| 17 |
GROQ_URL = "https://api.groq.com/openai/v1/chat/completions"
|
|
|
|
| 21 |
_active_provider = "auto" # "auto" | "ollama" | "groq" | "hf"
|
| 22 |
_active_model = None # None = use env-var default for chosen provider
|
| 23 |
|
| 24 |
+
# System prompt matching the finetuned model's training
|
| 25 |
+
SYSTEM_PROMPT = (
|
| 26 |
+
"You are StockEx AI Analyst, an expert in stock market microstructure, "
|
| 27 |
+
"order book dynamics, and real-time trading analysis for the Athens Stock Exchange. "
|
| 28 |
+
"When given market data, respond with a single flowing paragraph of natural market "
|
| 29 |
+
"commentary. Mention specific stocks, prices, trade counts, and volumes where relevant. "
|
| 30 |
+
"Assess sentiment (bullish/bearish/cautious/neutral) and give a forward-looking observation. "
|
| 31 |
+
"Do not use bullet points, headers, or JSON. Write like a professional market analyst."
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
# ββ Rolling market data buffers ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 35 |
recent_trades = deque(maxlen=200)
|
| 36 |
latest_snapshots = {} # symbol -> snapshot dict
|
|
|
|
| 90 |
if not HF_TOKEN:
|
| 91 |
return None
|
| 92 |
m = model or HF_MODEL
|
| 93 |
+
if m.startswith("RayMelius/") or "/" in m:
|
| 94 |
+
url = f"https://api-inference.huggingface.co/models/{m}/v1/chat/completions"
|
| 95 |
+
else:
|
| 96 |
+
url = "https://router.huggingface.co/v1/chat/completions"
|
| 97 |
+
print(f"[AI-Analyst] Calling HF: model={m}")
|
| 98 |
for attempt in range(3):
|
| 99 |
try:
|
| 100 |
resp = requests.post(
|
| 101 |
url,
|
| 102 |
headers={"Authorization": f"Bearer {HF_TOKEN}", "Content-Type": "application/json"},
|
| 103 |
+
json={"model": m,
|
| 104 |
+
"messages": [{"role": "system", "content": SYSTEM_PROMPT},
|
| 105 |
+
{"role": "user", "content": prompt}],
|
| 106 |
+
"max_tokens": 300, "temperature": 0.7},
|
| 107 |
timeout=60,
|
| 108 |
)
|
| 109 |
print(f"[AI-Analyst] HF response status: {resp.status_code}")
|