Spaces:

Kasher13
/

tch-ai

Sleeping

App Files Files Community

KitTran1307 commited on Apr 20

Commit

d73c442

0 Parent(s):

fix(dockerfile): use llama-cpp-python==0.3.20 (0.3.9 does not exist on PyPI)

Browse files

Files changed (5) hide show

Dockerfile +24 -0
README.md +35 -0
app.py +209 -0
packages.txt +2 -0
requirements.txt +4 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,24 @@

+FROM python:3.11-slim
+WORKDIR /app
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential cmake && \
+    rm -rf /var/lib/apt/lists/*
+# Single-threaded build + disable BLAS → ~3GB peak RAM (fits cpu-basic)
+ENV CMAKE_BUILD_PARALLEL_LEVEL=1 \
+    CMAKE_ARGS="-DGGML_BLAS=OFF -DGGML_NATIVE=OFF" \
+    PYTHONUNBUFFERED=1 \
+    GRADIO_SERVER_NAME=0.0.0.0 \
+    GRADIO_SERVER_PORT=7860
+RUN pip install --no-cache-dir llama-cpp-python==0.3.20
+RUN pip install --no-cache-dir "gradio>=5.0.0" "huggingface_hub>=0.23.0"
+COPY app.py .
+EXPOSE 7860
+CMD ["python", "app.py"]

README.md ADDED Viewed

	@@ -0,0 +1,35 @@

+---
+title: TwoCentsHustler AI
+emoji: 📈
+colorFrom: blue
+colorTo: indigo
+sdk: docker
+pinned: false
+license: apache-2.0
+---
+# TwoCentsHustler AI Space
+Local inference on **cpu-basic** (free, unlimited).
+Runs `gemma-4-E4B-it-Q4_K_M.gguf` (~2.7 GB) via `llama-cpp-python`.
+Fallback provider for the TwoCentsHustler financial news platform.
+## Endpoint
+`POST /api/ai` — `{ "operation": "analyze"|"summarize"|"cluster", "payload": {...} }`
+## Environment Variables
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `GGUF_REPO` | `unsloth/gemma-4-E4B-it-GGUF` | HF repo containing the GGUF file |
+| `GGUF_FILE` | `gemma-4-E4B-it-Q4_K_M.gguf` | Quantization variant to load |
+| `N_THREADS` | `2` | CPU threads for inference |
+| `N_CTX` | `4096` | Context window size |
+| `HF_TOKEN` | — | Optional: for gated models |
+## Hardware
+`cpu-basic` — 2 vCPU, 16 GB RAM.
+Inference: ~20-40s per call.

app.py ADDED Viewed

	@@ -0,0 +1,209 @@

+"""
+TwoCentsHustler AI Space — local inference edition.
+Runs google/gemma-4-E4B-it Q4_K_M via llama-cpp on cpu-basic (free, unlimited).
+Model: ~2.7 GB GGUF, fits in 16 GB RAM.
+Inference: ~20-40s on 2 vCPU — acceptable as Gemini fallback.
+POST /api/ai  { "operation": "analyze"|"summarize"|"cluster", "payload": {...} }
+"""
+import os
+import json
+import re
+import gradio as gr
+from fastapi import Request
+from fastapi.responses import JSONResponse
+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+REPO_ID = os.environ.get("GGUF_REPO", "unsloth/gemma-4-E4B-it-GGUF")
+GGUF_FILE = os.environ.get("GGUF_FILE", "gemma-4-E4B-it-Q4_K_M.gguf")
+HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HF_ACCESS_TOKEN")
+N_CTX = int(os.environ.get("N_CTX", "4096"))
+N_THREADS = int(os.environ.get("N_THREADS", "2"))
+print(f"Downloading {REPO_ID}/{GGUF_FILE} …")
+model_path = hf_hub_download(
+    repo_id=REPO_ID,
+    filename=GGUF_FILE,
+    token=HF_TOKEN,
+)
+print(f"Loading model from {model_path} …")
+llm = Llama(
+    model_path=model_path,
+    n_ctx=N_CTX,
+    n_threads=N_THREADS,
+    n_gpu_layers=0,       # CPU-only
+    verbose=False,
+)
+print("Model ready.")
+# ── Inference ─────────────────────────────────────────────────────────────────
+def _generate(prompt: str) -> str:
+    result = llm.create_chat_completion(
+        messages=[{"role": "user", "content": prompt}],
+        max_tokens=1024,
+        temperature=0.0,
+        response_format={"type": "json_object"},
+    )
+    return result["choices"][0]["message"]["content"]
+# ── Prompt builders (mirrors lib/ai/prompts.ts) ───────────────────────────────
+_ANALYSIS_SCHEMA = """\
+Respond ONLY with valid JSON:
+{
+  "sentiment": "positive"|"negative"|"neutral"|"mixed",
+  "sentimentScore": integer -100..100,
+  "marketRelevance": integer 0..100,
+  "impactReasoning": string <=200 chars,
+  "impactOverride": "HIGH"|"MEDIUM"|"LOW"|null,
+  "entities": [{"entityType":"ticker"|"company"|"person"|"place"|"commodity"|"currency"|"central_bank","value":string,"normalized":string|null,"confidence":integer 0..100}]
+}"""
+def _build_analysis_prompt(p: dict) -> str:
+    lines = [
+        "You are a financial news analyst. Analyze one article and output structured JSON.",
+        "",
+        f"ARTICLE CATEGORY: {p.get('category', 'unknown')}",
+        f"HEADLINE: {p.get('headline', '')}",
+    ]
+    if p.get("summary"):
+        lines.append(f"SUMMARY: {p['summary']}")
+    lines += [
+        f"RULE-BASED IMPACT: {p.get('ruleImpact', 'MEDIUM')} (override only if clearly wrong)",
+        "",
+        "Extract: market sentiment, market relevance, impact reasoning, and all named entities.",
+        "Prefer normalized ticker symbols (e.g. 'AAPL') in the normalized field.",
+        "",
+        _ANALYSIS_SCHEMA,
+    ]
+    return "\n".join(lines)
+def _build_summary_prompt(p: dict) -> str:
+    items = p.get("items", [])
+    max_bullets = p.get("maxBullets", 6)
+    scope = p.get("scope", "daily")
+    article_lines = "\n".join(
+        f"{i+1}. [{it.get('category','?')}|{it.get('impact','?')}|{it.get('publishedAt','')}] "
+        f"{it.get('headline','')}"
+        + (f" — {it.get('summary','')[:200]}" if it.get("summary") else "")
+        for i, it in enumerate(items[:60])
+    )
+    return "\n".join([
+        f"You are writing a {scope} market brief for active traders.",
+        f"Synthesize the following {len(items)} articles into a concise brief.",
+        "",
+        article_lines,
+        "",
+        f'Output JSON: {{"content": string (markdown <=400 words), "highlights": string[] (<={max_bullets} bullets each <=120 chars)}}',
+    ])
+def _build_cluster_prompt(p: dict) -> str:
+    items = p.get("items", [])
+    article_lines = "\n".join(
+        f"{i+1}. [id:{it.get('id','?')}|{it.get('category','?')}] {it.get('headline','')} "
+        f"(entities: {', '.join(f\"{e.get('entityType','?')}:{e.get('normalized') or e.get('value','?')}\" for e in it.get('entities', [])) or 'none'})"
+        for i, it in enumerate(items[:40])
+    )
+    return "\n".join([
+        "Cluster these financial news articles into market events.",
+        "Group into 0..N events where each is a coherent story thread.",
+        "Skip articles that don't belong to any multi-article event.",
+        "",
+        article_lines,
+        "",
+        'Output JSON: [{"title":string<=80,"description":string|null,"category":"MACRO"|"STOCKS"|"CRYPTO"|"FOREX"|"COMMODITIES","itemIds":string[]>=2,"keyEntities":string[],"relevanceScores":{itemId:0..100}}]',
+    ])
+# ── JSON extractor ────────────────────────────────────────────────────────────
+def _extract_json(text: str):
+    text = text.strip()
+    try:
+        return json.loads(text)
+    except json.JSONDecodeError:
+        pass
+    text = re.sub(r"^```(?:json)?\s*", "", text, flags=re.IGNORECASE)
+    text = re.sub(r"\s*```$", "", text)
+    try:
+        return json.loads(text)
+    except json.JSONDecodeError:
+        pass
+    candidates = [(text.find("{"), "}"), (text.find("["), "]")]
+    candidates = [(i, c) for i, c in candidates if i != -1]
+    if candidates:
+        first = min(candidates, key=lambda x: x[0])[0]
+        last = max(text.rfind("}"), text.rfind("]"))
+        if last > first:
+            return json.loads(text[first : last + 1])
+    raise ValueError(f"No JSON found: {text[:200]}")
+# ── Dispatcher ────────────────────────────────────────────────────────────────
+def _dispatch(operation: str, payload: dict):
+    if operation == "analyze":
+        prompt = _build_analysis_prompt(payload)
+    elif operation == "summarize":
+        prompt = _build_summary_prompt(payload)
+    elif operation == "cluster":
+        prompt = _build_cluster_prompt(payload)
+    else:
+        raise ValueError(f"Unknown operation: {operation!r}")
+    return _extract_json(_generate(prompt))
+# ── Gradio UI ─────────────────────────────────────────────────────────────────
+with gr.Blocks(title="TwoCentsHustler AI") as demo:
+    gr.Markdown(
+        f"## TwoCentsHustler AI\n"
+        f"`{GGUF_FILE}` · cpu-basic · free & unlimited"
+    )
+    with gr.Row():
+        op = gr.Dropdown(["analyze", "summarize", "cluster"], value="analyze", label="Operation")
+        payload_box = gr.Code(
+            value='{"headline":"Fed raises rates by 25bps","category":"MACRO","ruleImpact":"HIGH"}',
+            language="json",
+            label="Payload",
+        )
+    out = gr.JSON(label="Result")
+    btn = gr.Button("Run")
+    def _gradio_run(operation: str, payload_str: str):
+        try:
+            return _dispatch(operation, json.loads(payload_str or "{}"))
+        except Exception as e:
+            return {"error": str(e)}
+    btn.click(_gradio_run, inputs=[op, payload_box], outputs=out)
+# ── REST route ────────────────────────────────────────────────────────────────
+app = demo.app
+@app.post("/api/ai")
+async def ai_endpoint(request: Request):
+    try:
+        body = await request.json()
+        result = _dispatch(body.get("operation", ""), body.get("payload", {}))
+        return JSONResponse(content=result)
+    except ValueError as exc:
+        return JSONResponse(content={"error": str(exc)}, status_code=400)
+    except Exception as exc:
+        return JSONResponse(content={"error": str(exc)}, status_code=500)
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)

packages.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ cmake
2	+ libopenblas-dev

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+--prefer-binary
+gradio>=5.0.0
+huggingface_hub>=0.23.0
+llama-cpp-python