KitTran1307 commited on
Commit
d73c442
Β·
0 Parent(s):

fix(dockerfile): use llama-cpp-python==0.3.20 (0.3.9 does not exist on PyPI)

Browse files
Files changed (5) hide show
  1. Dockerfile +24 -0
  2. README.md +35 -0
  3. app.py +209 -0
  4. packages.txt +2 -0
  5. requirements.txt +4 -0
Dockerfile ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ RUN apt-get update && apt-get install -y --no-install-recommends \
6
+ build-essential cmake && \
7
+ rm -rf /var/lib/apt/lists/*
8
+
9
+ # Single-threaded build + disable BLAS β†’ ~3GB peak RAM (fits cpu-basic)
10
+ ENV CMAKE_BUILD_PARALLEL_LEVEL=1 \
11
+ CMAKE_ARGS="-DGGML_BLAS=OFF -DGGML_NATIVE=OFF" \
12
+ PYTHONUNBUFFERED=1 \
13
+ GRADIO_SERVER_NAME=0.0.0.0 \
14
+ GRADIO_SERVER_PORT=7860
15
+
16
+ RUN pip install --no-cache-dir llama-cpp-python==0.3.20
17
+
18
+ RUN pip install --no-cache-dir "gradio>=5.0.0" "huggingface_hub>=0.23.0"
19
+
20
+ COPY app.py .
21
+
22
+ EXPOSE 7860
23
+
24
+ CMD ["python", "app.py"]
README.md ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: TwoCentsHustler AI
3
+ emoji: πŸ“ˆ
4
+ colorFrom: blue
5
+ colorTo: indigo
6
+ sdk: docker
7
+ pinned: false
8
+ license: apache-2.0
9
+ ---
10
+
11
+ # TwoCentsHustler AI Space
12
+
13
+ Local inference on **cpu-basic** (free, unlimited).
14
+ Runs `gemma-4-E4B-it-Q4_K_M.gguf` (~2.7 GB) via `llama-cpp-python`.
15
+
16
+ Fallback provider for the TwoCentsHustler financial news platform.
17
+
18
+ ## Endpoint
19
+
20
+ `POST /api/ai` β€” `{ "operation": "analyze"|"summarize"|"cluster", "payload": {...} }`
21
+
22
+ ## Environment Variables
23
+
24
+ | Variable | Default | Description |
25
+ |----------|---------|-------------|
26
+ | `GGUF_REPO` | `unsloth/gemma-4-E4B-it-GGUF` | HF repo containing the GGUF file |
27
+ | `GGUF_FILE` | `gemma-4-E4B-it-Q4_K_M.gguf` | Quantization variant to load |
28
+ | `N_THREADS` | `2` | CPU threads for inference |
29
+ | `N_CTX` | `4096` | Context window size |
30
+ | `HF_TOKEN` | β€” | Optional: for gated models |
31
+
32
+ ## Hardware
33
+
34
+ `cpu-basic` β€” 2 vCPU, 16 GB RAM.
35
+ Inference: ~20-40s per call.
app.py ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ TwoCentsHustler AI Space β€” local inference edition.
3
+ Runs google/gemma-4-E4B-it Q4_K_M via llama-cpp on cpu-basic (free, unlimited).
4
+
5
+ Model: ~2.7 GB GGUF, fits in 16 GB RAM.
6
+ Inference: ~20-40s on 2 vCPU β€” acceptable as Gemini fallback.
7
+
8
+ POST /api/ai { "operation": "analyze"|"summarize"|"cluster", "payload": {...} }
9
+ """
10
+
11
+ import os
12
+ import json
13
+ import re
14
+ import gradio as gr
15
+ from fastapi import Request
16
+ from fastapi.responses import JSONResponse
17
+ from huggingface_hub import hf_hub_download
18
+ from llama_cpp import Llama
19
+
20
+ REPO_ID = os.environ.get("GGUF_REPO", "unsloth/gemma-4-E4B-it-GGUF")
21
+ GGUF_FILE = os.environ.get("GGUF_FILE", "gemma-4-E4B-it-Q4_K_M.gguf")
22
+ HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HF_ACCESS_TOKEN")
23
+ N_CTX = int(os.environ.get("N_CTX", "4096"))
24
+ N_THREADS = int(os.environ.get("N_THREADS", "2"))
25
+
26
+ print(f"Downloading {REPO_ID}/{GGUF_FILE} …")
27
+ model_path = hf_hub_download(
28
+ repo_id=REPO_ID,
29
+ filename=GGUF_FILE,
30
+ token=HF_TOKEN,
31
+ )
32
+ print(f"Loading model from {model_path} …")
33
+ llm = Llama(
34
+ model_path=model_path,
35
+ n_ctx=N_CTX,
36
+ n_threads=N_THREADS,
37
+ n_gpu_layers=0, # CPU-only
38
+ verbose=False,
39
+ )
40
+ print("Model ready.")
41
+
42
+
43
+ # ── Inference ─────────────────────────────────────────────────────────────────
44
+
45
+ def _generate(prompt: str) -> str:
46
+ result = llm.create_chat_completion(
47
+ messages=[{"role": "user", "content": prompt}],
48
+ max_tokens=1024,
49
+ temperature=0.0,
50
+ response_format={"type": "json_object"},
51
+ )
52
+ return result["choices"][0]["message"]["content"]
53
+
54
+
55
+ # ── Prompt builders (mirrors lib/ai/prompts.ts) ───────────────────────────────
56
+
57
+ _ANALYSIS_SCHEMA = """\
58
+ Respond ONLY with valid JSON:
59
+ {
60
+ "sentiment": "positive"|"negative"|"neutral"|"mixed",
61
+ "sentimentScore": integer -100..100,
62
+ "marketRelevance": integer 0..100,
63
+ "impactReasoning": string <=200 chars,
64
+ "impactOverride": "HIGH"|"MEDIUM"|"LOW"|null,
65
+ "entities": [{"entityType":"ticker"|"company"|"person"|"place"|"commodity"|"currency"|"central_bank","value":string,"normalized":string|null,"confidence":integer 0..100}]
66
+ }"""
67
+
68
+
69
+ def _build_analysis_prompt(p: dict) -> str:
70
+ lines = [
71
+ "You are a financial news analyst. Analyze one article and output structured JSON.",
72
+ "",
73
+ f"ARTICLE CATEGORY: {p.get('category', 'unknown')}",
74
+ f"HEADLINE: {p.get('headline', '')}",
75
+ ]
76
+ if p.get("summary"):
77
+ lines.append(f"SUMMARY: {p['summary']}")
78
+ lines += [
79
+ f"RULE-BASED IMPACT: {p.get('ruleImpact', 'MEDIUM')} (override only if clearly wrong)",
80
+ "",
81
+ "Extract: market sentiment, market relevance, impact reasoning, and all named entities.",
82
+ "Prefer normalized ticker symbols (e.g. 'AAPL') in the normalized field.",
83
+ "",
84
+ _ANALYSIS_SCHEMA,
85
+ ]
86
+ return "\n".join(lines)
87
+
88
+
89
+ def _build_summary_prompt(p: dict) -> str:
90
+ items = p.get("items", [])
91
+ max_bullets = p.get("maxBullets", 6)
92
+ scope = p.get("scope", "daily")
93
+ article_lines = "\n".join(
94
+ f"{i+1}. [{it.get('category','?')}|{it.get('impact','?')}|{it.get('publishedAt','')}] "
95
+ f"{it.get('headline','')}"
96
+ + (f" β€” {it.get('summary','')[:200]}" if it.get("summary") else "")
97
+ for i, it in enumerate(items[:60])
98
+ )
99
+ return "\n".join([
100
+ f"You are writing a {scope} market brief for active traders.",
101
+ f"Synthesize the following {len(items)} articles into a concise brief.",
102
+ "",
103
+ article_lines,
104
+ "",
105
+ f'Output JSON: {{"content": string (markdown <=400 words), "highlights": string[] (<={max_bullets} bullets each <=120 chars)}}',
106
+ ])
107
+
108
+
109
+ def _build_cluster_prompt(p: dict) -> str:
110
+ items = p.get("items", [])
111
+ article_lines = "\n".join(
112
+ f"{i+1}. [id:{it.get('id','?')}|{it.get('category','?')}] {it.get('headline','')} "
113
+ f"(entities: {', '.join(f\"{e.get('entityType','?')}:{e.get('normalized') or e.get('value','?')}\" for e in it.get('entities', [])) or 'none'})"
114
+ for i, it in enumerate(items[:40])
115
+ )
116
+ return "\n".join([
117
+ "Cluster these financial news articles into market events.",
118
+ "Group into 0..N events where each is a coherent story thread.",
119
+ "Skip articles that don't belong to any multi-article event.",
120
+ "",
121
+ article_lines,
122
+ "",
123
+ 'Output JSON: [{"title":string<=80,"description":string|null,"category":"MACRO"|"STOCKS"|"CRYPTO"|"FOREX"|"COMMODITIES","itemIds":string[]>=2,"keyEntities":string[],"relevanceScores":{itemId:0..100}}]',
124
+ ])
125
+
126
+
127
+ # ── JSON extractor ────────────────────────────────────────────────────────────
128
+
129
+ def _extract_json(text: str):
130
+ text = text.strip()
131
+ try:
132
+ return json.loads(text)
133
+ except json.JSONDecodeError:
134
+ pass
135
+ text = re.sub(r"^```(?:json)?\s*", "", text, flags=re.IGNORECASE)
136
+ text = re.sub(r"\s*```$", "", text)
137
+ try:
138
+ return json.loads(text)
139
+ except json.JSONDecodeError:
140
+ pass
141
+ candidates = [(text.find("{"), "}"), (text.find("["), "]")]
142
+ candidates = [(i, c) for i, c in candidates if i != -1]
143
+ if candidates:
144
+ first = min(candidates, key=lambda x: x[0])[0]
145
+ last = max(text.rfind("}"), text.rfind("]"))
146
+ if last > first:
147
+ return json.loads(text[first : last + 1])
148
+ raise ValueError(f"No JSON found: {text[:200]}")
149
+
150
+
151
+ # ── Dispatcher ────────────────────────────────────────────────────────────────
152
+
153
+ def _dispatch(operation: str, payload: dict):
154
+ if operation == "analyze":
155
+ prompt = _build_analysis_prompt(payload)
156
+ elif operation == "summarize":
157
+ prompt = _build_summary_prompt(payload)
158
+ elif operation == "cluster":
159
+ prompt = _build_cluster_prompt(payload)
160
+ else:
161
+ raise ValueError(f"Unknown operation: {operation!r}")
162
+ return _extract_json(_generate(prompt))
163
+
164
+
165
+ # ── Gradio UI ─────────────────────────────────────────────────────────────────
166
+
167
+ with gr.Blocks(title="TwoCentsHustler AI") as demo:
168
+ gr.Markdown(
169
+ f"## TwoCentsHustler AI\n"
170
+ f"`{GGUF_FILE}` Β· cpu-basic Β· free & unlimited"
171
+ )
172
+ with gr.Row():
173
+ op = gr.Dropdown(["analyze", "summarize", "cluster"], value="analyze", label="Operation")
174
+ payload_box = gr.Code(
175
+ value='{"headline":"Fed raises rates by 25bps","category":"MACRO","ruleImpact":"HIGH"}',
176
+ language="json",
177
+ label="Payload",
178
+ )
179
+ out = gr.JSON(label="Result")
180
+ btn = gr.Button("Run")
181
+
182
+ def _gradio_run(operation: str, payload_str: str):
183
+ try:
184
+ return _dispatch(operation, json.loads(payload_str or "{}"))
185
+ except Exception as e:
186
+ return {"error": str(e)}
187
+
188
+ btn.click(_gradio_run, inputs=[op, payload_box], outputs=out)
189
+
190
+
191
+ # ── REST route ────────────────────────────────────────────────────────────────
192
+
193
+ app = demo.app
194
+
195
+
196
+ @app.post("/api/ai")
197
+ async def ai_endpoint(request: Request):
198
+ try:
199
+ body = await request.json()
200
+ result = _dispatch(body.get("operation", ""), body.get("payload", {}))
201
+ return JSONResponse(content=result)
202
+ except ValueError as exc:
203
+ return JSONResponse(content={"error": str(exc)}, status_code=400)
204
+ except Exception as exc:
205
+ return JSONResponse(content={"error": str(exc)}, status_code=500)
206
+
207
+
208
+ if __name__ == "__main__":
209
+ demo.launch(server_name="0.0.0.0", server_port=7860)
packages.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ cmake
2
+ libopenblas-dev
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ --prefer-binary
2
+ gradio>=5.0.0
3
+ huggingface_hub>=0.23.0
4
+ llama-cpp-python