File size: 12,233 Bytes
d73c442 22af747 d73c442 22af747 d73c442 22af747 d73c442 22af747 51503b5 22af747 51503b5 22af747 51503b5 22af747 51503b5 22af747 d73c442 22af747 d73c442 22af747 d73c442 22af747 d73c442 51503b5 22af747 51503b5 22af747 51503b5 22af747 51503b5 22af747 51503b5 22af747 51503b5 22af747 d73c442 22af747 51503b5 22af747 51503b5 22af747 d73c442 22af747 d73c442 22af747 d73c442 22af747 d73c442 22af747 d73c442 9cb2415 d73c442 9cb2415 d73c442 51503b5 22af747 d73c442 22af747 d73c442 22af747 d73c442 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 | """
TwoCentsHustler AI Space.
Primary backend: Google Gemini API (Gemma 4 31B via AI Studio).
- Gemma 4 31B is a thinking model β response has a thought part + final answer part.
- 500 errors are transient (Google-side); retried with exponential backoff (3x).
- Falls through to llama-cpp on persistent failure.
Fallback backend: local llama-cpp (gemma-4-E4B GGUF) β slow (20-40s).
POST /api/ai { "operation": "analyze"|"summarize"|"cluster", "payload": {...} }
"""
import os
import json
import re
import time
import gradio as gr
from fastapi import Request
from fastapi.responses import JSONResponse
# ββ Gemini setup (optional β only when GEMINI_API_KEY is set) βββββββββββββββββ
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY", "")
GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemma-4-31b-it")
GEMINI_FALLBACK_MODEL = os.environ.get("GEMINI_FALLBACK_MODEL", "gemma-4-26b-a4b-it")
GEMINI_RETRIES = 3
_gemini_primary = None
_gemini_fallback = None
if GEMINI_API_KEY:
try:
import google.generativeai as genai
genai.configure(api_key=GEMINI_API_KEY)
_gemini_primary = genai.GenerativeModel(GEMINI_MODEL)
_gemini_fallback = genai.GenerativeModel(GEMINI_FALLBACK_MODEL)
print(f"Gemini backend ready: primary={GEMINI_MODEL}, fallback={GEMINI_FALLBACK_MODEL}")
except Exception as e:
print(f"Gemini init failed ({e}), will use llama-cpp")
_gemini_primary = _gemini_fallback = None
# ββ llama-cpp setup (always loaded as fallback) βββββββββββββββββββββββββββββββ
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
REPO_ID = os.environ.get("GGUF_REPO", "unsloth/gemma-4-E4B-it-GGUF")
GGUF_FILE = os.environ.get("GGUF_FILE", "gemma-4-E4B-it-Q4_K_M.gguf")
HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HF_ACCESS_TOKEN")
N_CTX = int(os.environ.get("N_CTX", "4096"))
N_THREADS = int(os.environ.get("N_THREADS", "2"))
print(f"Downloading {REPO_ID}/{GGUF_FILE} β¦")
model_path = hf_hub_download(
repo_id=REPO_ID,
filename=GGUF_FILE,
token=HF_TOKEN,
)
print(f"Loading llama-cpp model from {model_path} β¦")
_llm = Llama(
model_path=model_path,
n_ctx=N_CTX,
n_threads=N_THREADS,
n_gpu_layers=0,
verbose=False,
)
print("llama-cpp model ready.")
# ββ Inference βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def _call_gemini_model(model, model_name: str, prompt: str) -> str:
"""Call one Gemini model with exponential backoff on 5xx errors.
Both Gemma 4 models are thinking models β response.candidates[0].content.parts
contains a thought part (thought=True) then the final answer (thought=False).
Extract only the non-thought text so JSON extraction matches the answer, not
the reasoning chain.
"""
from google.generativeai.types import GenerationConfig
import google.api_core.exceptions as gapi_exc
for attempt in range(GEMINI_RETRIES):
try:
response = model.generate_content(
prompt,
generation_config=GenerationConfig(temperature=0.0),
)
parts = response.candidates[0].content.parts
answer_text = "".join(
p.text for p in parts if not getattr(p, "thought", False)
)
return answer_text or response.text
except Exception as e:
is_5xx = (
isinstance(e, gapi_exc.InternalServerError)
or isinstance(e, gapi_exc.ServiceUnavailable)
or "500" in str(e)
or "503" in str(e)
)
if is_5xx and attempt < GEMINI_RETRIES - 1:
wait = 2 ** attempt
print(f"{model_name} 5xx (attempt {attempt+1}/{GEMINI_RETRIES}), retry in {wait}s: {e}")
time.sleep(wait)
else:
raise
def _generate_gemini(prompt: str) -> str:
"""Try primary (31B), fall back to Gemini fallback (26B) on persistent 5xx."""
try:
return _call_gemini_model(_gemini_primary, GEMINI_MODEL, prompt)
except Exception as e:
print(f"{GEMINI_MODEL} exhausted retries ({e}), trying {GEMINI_FALLBACK_MODEL}")
return _call_gemini_model(_gemini_fallback, GEMINI_FALLBACK_MODEL, prompt)
def _generate_llama(prompt: str) -> str:
result = _llm.create_chat_completion(
messages=[{"role": "user", "content": prompt}],
max_tokens=1024,
temperature=0.0,
response_format={"type": "json_object"},
)
return result["choices"][0]["message"]["content"]
def _generate(prompt: str) -> str:
"""Try Gemini chain (31B β 26B) first; fall back to llama-cpp on total failure."""
if _gemini_primary is not None:
try:
return _generate_gemini(prompt)
except Exception as e:
print(f"Gemini chain exhausted ({e}), falling back to llama-cpp")
return _generate_llama(prompt)
# ββ Prompt builders (mirrors lib/ai/prompts.ts) βββββββββββββββββββββββββββββββ
_ANALYSIS_SCHEMA = """\
Respond ONLY with valid JSON:
{
"sentiment": "positive"|"negative"|"neutral"|"mixed",
"sentimentScore": integer -100..100,
"marketRelevance": integer 0..100,
"impactReasoning": string <=200 chars,
"impactOverride": "HIGH"|"MEDIUM"|"LOW",
"entities": [{"entityType":"ticker"|"company"|"person"|"place"|"commodity"|"currency"|"central_bank","value":string,"normalized":string|null,"confidence":integer 0..100}]
}
Impact classification criteria:
HIGH β market-moving: central bank decisions, major earnings beats/misses, geopolitical crisis,
large corporate bankruptcies/acquisitions, systemic financial events
MEDIUM β earnings reports, regulatory updates, M&A rumours, sector policy changes, key executive moves
LOW β routine company updates, minor personnel changes, informational/educational content,
press releases with no immediate market consequence"""
def _build_analysis_prompt(p: dict) -> str:
lines = [
"You are a financial news analyst. Analyze the article and output structured JSON.",
"",
f"ARTICLE CATEGORY: {p.get('category', 'unknown')}",
f"HEADLINE: {p.get('headline', '')}",
]
if p.get("summary"):
lines.append(f"SUMMARY: {p['summary']}")
lines += [
"",
"Extract: market sentiment, market relevance (0-100), impact level, impact reasoning,",
"and all named entities. Prefer normalized ticker symbols (e.g. 'AAPL') in normalized field.",
"impactOverride is REQUIRED β always classify as HIGH, MEDIUM, or LOW.",
"",
_ANALYSIS_SCHEMA,
]
return "\n".join(lines)
def _build_summary_prompt(p: dict) -> str:
items = p.get("items", [])
max_bullets = p.get("maxBullets", 6)
scope = p.get("scope", "daily")
article_lines = "\n".join(
f"{i+1}. [{it.get('category','?')}|{it.get('impact','?')}|{it.get('publishedAt','')}] "
f"{it.get('headline','')}"
+ (f" β {it.get('summary','')[:200]}" if it.get("summary") else "")
for i, it in enumerate(items[:60])
)
return "\n".join([
f"You are writing a {scope} market brief for active traders.",
f"Synthesize the following {len(items)} articles into a concise brief.",
"",
article_lines,
"",
f'Output JSON: {{"content": string (markdown <=400 words), "highlights": string[] (<={max_bullets} bullets each <=120 chars)}}',
])
def _fmt_entity(e: dict) -> str:
return f"{e.get('entityType','?')}:{e.get('normalized') or e.get('value','?')}"
def _build_cluster_prompt(p: dict) -> str:
items = p.get("items", [])
article_lines = "\n".join(
f"{i+1}. [id:{it.get('id','?')}|{it.get('category','?')}] {it.get('headline','')} "
f"(entities: {', '.join(_fmt_entity(e) for e in it.get('entities', [])) or 'none'})"
for i, it in enumerate(items[:40])
)
return "\n".join([
"Cluster these financial news articles into market events.",
"Group into 0..N events where each is a coherent story thread.",
"Skip articles that don't belong to any multi-article event.",
"",
article_lines,
"",
'Output JSON: [{"title":string<=80,"description":string|null,"category":"MACRO"|"STOCKS"|"CRYPTO"|"FOREX"|"COMMODITIES","itemIds":string[]>=2,"keyEntities":string[],"relevanceScores":{itemId:0..100}}]',
])
# ββ JSON extractor ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def _extract_json(text: str):
text = text.strip()
try:
return json.loads(text)
except json.JSONDecodeError:
pass
text = re.sub(r"^```(?:json)?\s*", "", text, flags=re.IGNORECASE)
text = re.sub(r"\s*```$", "", text)
try:
return json.loads(text)
except json.JSONDecodeError:
pass
candidates = [(text.find("{"), "}"), (text.find("["), "]")]
candidates = [(i, c) for i, c in candidates if i != -1]
if candidates:
first = min(candidates, key=lambda x: x[0])[0]
last = max(text.rfind("}"), text.rfind("]"))
if last > first:
return json.loads(text[first : last + 1])
raise ValueError(f"No JSON found: {text[:200]}")
# ββ Dispatcher ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def _dispatch(operation: str, payload: dict):
if operation == "analyze":
prompt = _build_analysis_prompt(payload)
elif operation == "summarize":
prompt = _build_summary_prompt(payload)
elif operation == "cluster":
prompt = _build_cluster_prompt(payload)
else:
raise ValueError(f"Unknown operation: {operation!r}")
return _extract_json(_generate(prompt))
# ββ Gradio UI βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
_backend_label = f"Gemini ({GEMINI_MODEL} β {GEMINI_FALLBACK_MODEL})" if _gemini_primary else f"llama-cpp ({GGUF_FILE})"
with gr.Blocks(title="TwoCentsHustler AI") as demo:
gr.Markdown(
f"## TwoCentsHustler AI\n"
f"Primary: `{_backend_label}` Β· Fallback: `{GGUF_FILE}` (llama-cpp)"
)
with gr.Row():
op = gr.Dropdown(["analyze", "summarize", "cluster"], value="analyze", label="Operation")
payload_box = gr.Code(
value='{"headline":"Fed raises rates by 25bps","category":"MACRO"}',
language="json",
label="Payload",
)
out = gr.JSON(label="Result")
btn = gr.Button("Run")
def _gradio_run(operation: str, payload_str: str):
try:
return _dispatch(operation, json.loads(payload_str or "{}"))
except Exception as e:
return {"error": str(e)}
btn.click(_gradio_run, inputs=[op, payload_box], outputs=out)
# ββ REST route ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
app = demo.app
@app.post("/api/ai")
async def ai_endpoint(request: Request):
try:
body = await request.json()
result = _dispatch(body.get("operation", ""), body.get("payload", {}))
return JSONResponse(content=result)
except ValueError as exc:
return JSONResponse(content={"error": str(exc)}, status_code=400)
except Exception as exc:
return JSONResponse(content={"error": str(exc)}, status_code=500)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)
|