cyberkyne commited on
Commit
6a6d9aa
·
verified ·
1 Parent(s): 5a3ee96

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -482
app.py DELETED
@@ -1,482 +0,0 @@
1
- """
2
- app.py — HuggingFace Spaces entry point.
3
-
4
- Architecture:
5
- Python : Gradio UI, Claude API calls, HF I/O, PDF processing
6
- Julia : Indicators, BacktestEngine, WalkForwardOptimizer, SignalCompiler
7
-
8
- Python NEVER does numerical computation. It only:
9
- 1. Calls Claude API (extraction + strategy code generation)
10
- 2. Calls Julia via juliacall for all math
11
- 3. Reads/writes HuggingFace datasets
12
- 4. Renders Gradio UI
13
- """
14
-
15
- import io, json, zipfile, tempfile
16
- from pathlib import Path
17
- from datetime import datetime
18
-
19
- import gradio as gr
20
- from loguru import logger
21
-
22
- import utils.config as cfg
23
- import utils.hf_io as hf
24
- from pipeline.pdf_processor import PDFProcessor
25
- from pipeline.extractor import AIExtractor, Deduplicator
26
- from pipeline.julia_bridge import full_backtest_pipeline, julia_available
27
- from pipeline.exporter import (
28
- slugify, strategy_md, formula_md,
29
- backtest_report_md, optimal_json, mt5_set,
30
- julia_config, index_md,
31
- )
32
-
33
- # ── Lazy KB ───────────────────────────────────────────
34
- _kb = None
35
- def get_kb():
36
- global _kb
37
- if _kb is None: _kb = hf.kb_load()
38
- return _kb
39
- def reset_kb():
40
- global _kb; _kb = hf.kb_load()
41
-
42
-
43
- # ═══════════════════════════════════════════════════
44
- # TAB 1 — UPLOAD & EXTRACT
45
- # ═══════════════════════════════════════════════════
46
-
47
- def run_extraction(pdf_files, progress=gr.Progress()):
48
- if not pdf_files: return "⚠️ No PDFs uploaded.", ""
49
- if not cfg.ANTHROPIC_API_KEY: return "❌ ANTHROPIC_API_KEY secret not set.", ""
50
- if not cfg.HF_DATASET_REPO: return "❌ HF_DATASET_REPO secret not set.", ""
51
-
52
- proc = PDFProcessor()
53
- ai = AIExtractor()
54
- dedup = Deduplicator()
55
- kb = get_kb()
56
- log = []
57
- totals = {k:{"added":0,"merged":0,"skipped":0} for k in ("strategies","formulas","systems")}
58
- hf_files = []
59
-
60
- for i, pdf_file in enumerate(pdf_files):
61
- path = Path(pdf_file.name)
62
- progress(i/len(pdf_files), desc=f"{path.name}")
63
- log.append(f"\n📖 [{i+1}/{len(pdf_files)}] {path.name}")
64
- try:
65
- chunks = list(proc.process(path))
66
- log.append(f" → {len(chunks)} chunks")
67
- except Exception as e:
68
- log.append(f" ❌ {e}"); continue
69
-
70
- for chunk in chunks:
71
- extracted = ai.extract(chunk)
72
- stats = dedup.process(extracted, kb)
73
- for kind in ("strategies","formulas","systems"):
74
- for act in ("added","merged","skipped"):
75
- totals[kind][act] += stats[kind][act]
76
-
77
- log.append(f" → New: {totals['strategies']['added']} strats, {totals['formulas']['added']} formulas")
78
- if cfg.HF_TOKEN: hf.pdf_upload(path)
79
-
80
- for cid, rec in kb["strategies"].items():
81
- hf_files.append((f"extracted/strategies/{slugify(rec.get('name',''))}.md",
82
- strategy_md(rec).encode()))
83
- for cid, rec in kb["formulas"].items():
84
- hf_files.append((f"extracted/formulas/{slugify(rec.get('name',''))}.md",
85
- formula_md(rec).encode()))
86
-
87
- progress(0.9, desc="Saving to HuggingFace…")
88
- hf.kb_save(kb)
89
- if hf_files and cfg.HF_TOKEN:
90
- pushed = hf.push_batch(hf_files, "Update extracted knowledge")
91
- log.append(f"\n☁️ Pushed {pushed} files to HuggingFace")
92
- reset_kb()
93
-
94
- counts = {k: len(kb[k]) for k in kb}
95
- summary = f"""✅ Extraction Complete
96
-
97
- PDFs processed: {len(pdf_files)}
98
- Strategies — added: {totals['strategies']['added']} merged: {totals['strategies']['merged']} skipped: {totals['strategies']['skipped']}
99
- Formulas — added: {totals['formulas']['added']} merged: {totals['formulas']['merged']} skipped: {totals['formulas']['skipped']}
100
- Systems — added: {totals['systems']['added']} merged: {totals['systems']['merged']} skipped: {totals['systems']['skipped']}
101
-
102
- KB totals: {counts['strategies']} strategies · {counts['formulas']} formulas · {counts['systems']} systems
103
- Tokens used: {ai.tokens_used:,}"""
104
- return summary, "\n".join(log[-40:])
105
-
106
-
107
- # ═══════════════════════════════════════════════════
108
- # TAB 2 — BROWSE KB
109
- # ═══════════════════════════════════════════════════
110
-
111
- def search_strategies(query, category):
112
- kb = get_kb(); items = list(kb["strategies"].values())
113
- if category and category != "All":
114
- items = [x for x in items if x.get("category") == category]
115
- if query:
116
- q = query.lower()
117
- items = [x for x in items if q in x.get("name","").lower() or q in x.get("description","").lower()]
118
- rows = [[x.get("name","")[:50], x.get("category",""),
119
- x.get("description","")[:100],
120
- ", ".join(x.get("sources",[]))[:40], len(x.get("layers",[]))]
121
- for x in items[:100]]
122
- return rows, f"{len(items)} strategies"
123
-
124
- def search_formulas(query):
125
- kb = get_kb(); items = list(kb["formulas"].values())
126
- if query:
127
- q = query.lower()
128
- items = [x for x in items if q in x.get("name","").lower() or q in x.get("purpose","").lower()]
129
- return [[x.get("name","")[:50], x.get("category",""),
130
- x.get("purpose","")[:80],
131
- "✅" if x.get("latex") else "—",
132
- ", ".join(x.get("sources",[]))[:40]] for x in items[:100]]
133
-
134
- def dl_strategy(name):
135
- kb = get_kb()
136
- for rec in kb["strategies"].values():
137
- if rec.get("name","").lower() == name.strip().lower():
138
- tmp = tempfile.mktemp(suffix=".md")
139
- Path(tmp).write_text(strategy_md(rec), encoding="utf-8")
140
- return tmp
141
- return None
142
-
143
- def dl_all_strategies_zip(category):
144
- kb = get_kb(); items = list(kb["strategies"].values())
145
- if category and category != "All":
146
- items = [x for x in items if x.get("category") == category]
147
- tmp = tempfile.mktemp(suffix=".zip")
148
- with zipfile.ZipFile(tmp, "w", zipfile.ZIP_DEFLATED) as zf:
149
- for rec in items:
150
- zf.writestr(f"{slugify(rec.get('name','unknown'))}.md", strategy_md(rec))
151
- return tmp
152
-
153
-
154
- # ═══════════════════════════════════════════════════
155
- # TAB 3 — BACKTEST (Julia Engine)
156
- # ═══════════════════════════════════════════════════
157
-
158
- def load_symbols():
159
- syms = hf.tick_list_symbols()
160
- return gr.update(choices=syms, value=syms[:2] if len(syms)>=2 else syms)
161
-
162
-
163
- def run_backtests(selected_symbols, selected_timeframes,
164
- strategy_filter, max_strategies, viable_only,
165
- progress=gr.Progress()):
166
-
167
- if not cfg.HF_TICK_REPO: return "❌ HF_TICK_REPO not set.", ""
168
- if not cfg.ANTHROPIC_API_KEY: return "❌ ANTHROPIC_API_KEY not set.", ""
169
- if not julia_available(): return "❌ Julia runtime not available. Check build logs.", ""
170
-
171
- ai = AIExtractor()
172
- kb = get_kb()
173
- strats = list(kb["strategies"].values())
174
- if strategy_filter:
175
- strats = [s for s in strats if strategy_filter.lower() in s.get("name","").lower()]
176
- if max_strategies > 0:
177
- strats = strats[:int(max_strategies)]
178
- if not strats: return "⚠️ No strategies. Run extraction first.", ""
179
-
180
- symbols = selected_symbols or hf.tick_list_symbols()[:2]
181
- timeframes = selected_timeframes or ["1h"]
182
-
183
- log, all_results, viable_count = [], [], 0
184
-
185
- for si, rec in enumerate(strats):
186
- name = rec.get("name","?")
187
- progress(si/len(strats), desc=f"[{si+1}/{len(strats)}] {name[:35]}")
188
-
189
- # 1. Generate Julia signal code via Claude
190
- jl_code = ai.compile_strategy_code(rec)
191
- if not jl_code:
192
- log.append(f"❌ Code gen failed: {name[:40]}"); continue
193
- log.append(f"✅ Julia code generated: {name[:40]}")
194
-
195
- for sym in symbols:
196
- for tf in timeframes:
197
- df = hf.tick_load(sym, tf)
198
- if df is None or len(df) < 200:
199
- log.append(f" ⚠️ {sym} {tf}: no data"); continue
200
-
201
- # 2. Full Julia pipeline (compile → optimize → backtest)
202
- result = full_backtest_pipeline(
203
- strategy_code = jl_code,
204
- strategy_name = name,
205
- open_p = df["open"].values,
206
- high = df["high"].values,
207
- low = df["low"].values,
208
- close = df["close"].values,
209
- volume = df["volume"].values,
210
- timeframe = tf,
211
- symbol = sym,
212
- n_windows = cfg.WF_WINDOWS,
213
- is_ratio = cfg.WF_IS_RATIO,
214
- min_trades = cfg.MIN_TRADES,
215
- min_sharpe = cfg.MIN_SHARPE,
216
- max_combos = cfg.MAX_PARAM_COMBOS,
217
- initial_equity = cfg.INITIAL_EQUITY,
218
- commission_pct = cfg.COMMISSION_PCT,
219
- risk_per_trade = cfg.RISK_PER_TRADE,
220
- )
221
- all_results.append(result)
222
-
223
- # 3. Build + push output files
224
- if cfg.HF_TOKEN and cfg.HF_DATASET_REPO:
225
- if not viable_only or result.get("is_viable"):
226
- hf.push_result(
227
- name, sym, tf,
228
- backtest_report_md(result, rec),
229
- optimal_json(result, rec),
230
- mt5_set(result, rec),
231
- julia_config(result),
232
- )
233
-
234
- status = "✅" if result.get("is_viable") else "❌"
235
- log.append(
236
- f" {status} {sym} {tf}: "
237
- f"Sharpe={result.get('oos_sharpe_mean',0):.2f} "
238
- f"DD={result.get('oos_max_dd',0):.1f}% "
239
- f"Score={result.get('robustness',0):.0f}")
240
- if result.get("is_viable"): viable_count += 1
241
-
242
- # 4. Push master index
243
- if all_results and cfg.HF_TOKEN:
244
- hf.push_index(index_md(all_results), {
245
- "generated": datetime.now().isoformat(),
246
- "engine": "Julia 1.10",
247
- "total_strategies": len(all_results),
248
- "viable_count": viable_count,
249
- "strategies": all_results,
250
- })
251
-
252
- summary = f"""🏁 Julia Backtest Complete
253
-
254
- Engine: Julia 1.10 BacktestEngine.jl
255
- Strategies compiled: {len(strats)}
256
- Combinations tested: {len(all_results)}
257
- Viable strategies: {viable_count}
258
- Pass rate: {viable_count/max(len(all_results),1)*100:.1f}%
259
-
260
- Results on HuggingFace:
261
- {cfg.HF_DATASET_REPO}/optimal_sets/BACKTEST_INDEX.md"""
262
- return summary, "\n".join(log[-60:])
263
-
264
-
265
- # ═══════════════════════════════════════════════════
266
- # TAB 4 — RESULTS
267
- # ═══════════════════════════════════════════════════
268
-
269
- def load_results():
270
- data = hf.fetch_index()
271
- if not data: return [], "No results yet."
272
- strats = data.get("strategies",[])
273
- viable = sorted([s for s in strats if s.get("is_viable")],
274
- key=lambda x: x.get("oos_sharpe_mean",0), reverse=True)
275
- rows = [[s.get("strategy","")[:45], s.get("symbol",""), s.get("timeframe",""),
276
- f'{s.get("oos_sharpe_mean",0):.2f}', f'{s.get("oos_max_dd",0):.1f}%',
277
- f'{s.get("oos_win_rate",0):.1f}%', f'{s.get("oos_pf_mean",0):.2f}',
278
- f'{s.get("robustness",0):.0f}'] for s in viable]
279
- count = (f"✅ {len(viable)} viable / {len(strats)} tested | "
280
- f"Engine: Julia | {data.get('generated','')[:16]}")
281
- return rows, count
282
-
283
- def dl_result_file(name, symbol, tf, ftype):
284
- sl = slugify(name); sym = symbol.upper().strip()
285
- pre = f"{sl}_{sym}_{tf}"
286
- ext_map = {"MT5 .set file": f"optimal_sets/{pre}.set",
287
- "Optimal JSON": f"optimal_sets/{pre}_optimal.json",
288
- "Julia config": f"optimal_sets/{pre}_config.jl",
289
- "Full report": f"backtests/{sl}/{pre}_report.md"}
290
- remote = ext_map.get(ftype,"")
291
- if not remote: return None
292
- data = hf.fetch_file(remote)
293
- if not data: return None
294
- tmp = tempfile.mktemp(suffix=Path(remote).suffix)
295
- Path(tmp).write_bytes(data)
296
- return tmp
297
-
298
- def dl_all_sets():
299
- data = hf.fetch_index()
300
- if not data: return None
301
- tmp = tempfile.mktemp(suffix=".zip")
302
- with zipfile.ZipFile(tmp,"w",zipfile.ZIP_DEFLATED) as zf:
303
- for s in data.get("strategies",[]):
304
- if not s.get("is_viable"): continue
305
- sl = slugify(s["strategy"]); sym = s["symbol"]; tf = s["timeframe"]
306
- content = hf.fetch_file(f"optimal_sets/{sl}_{sym}_{tf}.set")
307
- if content: zf.writestr(f"{sl}_{sym}_{tf}.set", content)
308
- return tmp
309
-
310
-
311
- # ═══════════════════════════════════════════════════
312
- # TAB 5 — SETUP
313
- # ═══════════════════════════════════════════════════
314
-
315
- def check_config():
316
- checks = [
317
- ("ANTHROPIC_API_KEY", cfg.ANTHROPIC_API_KEY, "Claude API"),
318
- ("HF_TOKEN", cfg.HF_TOKEN, "HF write access"),
319
- ("HF_DATASET_REPO", cfg.HF_DATASET_REPO, "Results storage"),
320
- ("HF_TICK_REPO", cfg.HF_TICK_REPO, "Tick data source"),
321
- ]
322
- kb = get_kb()
323
- symbols = hf.tick_list_symbols() if cfg.HF_TICK_REPO else []
324
- jl_ok = julia_available()
325
-
326
- lines = ["## Configuration Status", ""]
327
- for name, val, desc in checks:
328
- icon = "✅" if val else "❌"
329
- lines.append(f"{icon} `{name}` — {desc}")
330
-
331
- lines += ["", "## Julia Engine", "",
332
- f"{'✅' if jl_ok else '❌'} Julia runtime: {'available' if jl_ok else 'not available (check build logs)'}",
333
- "", "## Data Status", "",
334
- f"- Tick symbols: **{len(symbols)}** — {', '.join(symbols[:8])}",
335
- f"- Strategies in KB: **{len(kb['strategies'])}**",
336
- f"- Formulas in KB: **{len(kb['formulas'])}**",
337
- "", "## Backtest Settings", "",
338
- f"- WF Windows: `{cfg.WF_WINDOWS}` · IS Ratio: `{cfg.WF_IS_RATIO}`",
339
- f"- Min Trades: `{cfg.MIN_TRADES}` · Min Sharpe: `{cfg.MIN_SHARPE}`",
340
- f"- Commission: `{cfg.COMMISSION_PCT*100:.3f}%` · Risk/trade: `{cfg.RISK_PER_TRADE*100:.1f}%`",
341
- f"- Timeframes: `{', '.join(cfg.BACKTEST_TFS)}`"]
342
- return "\n".join(lines)
343
-
344
-
345
- # ═══════════════════════════════════════════════════
346
- # BUILD APP
347
- # ═══════════════════════════════════════════════════
348
-
349
- CATS = ["All"] + cfg.CATEGORIES
350
-
351
- with gr.Blocks(
352
- title="Quant Knowledge Extractor — Julia Engine",
353
- theme=gr.themes.Base(primary_hue="green", neutral_hue="gray"),
354
- css=".status-box{font-family:monospace;font-size:.82em}"
355
- ) as demo:
356
-
357
- gr.HTML("""
358
- <div style="text-align:center;padding:1.2em 0 .3em">
359
- <h1 style="font-size:2em;color:#16a34a;margin:0">📊 Quant Knowledge Extractor</h1>
360
- <p style="color:#6b7280;margin:.4em 0 0">
361
- Julia 1.10 Engine · BacktestEngine.jl · WalkForward Optimizer · MT5 .set Output
362
- </p>
363
- </div>""")
364
-
365
- with gr.Tabs():
366
-
367
- # Tab 1 — Extract
368
- with gr.Tab("📤 Upload & Extract"):
369
- gr.Markdown("### Upload algorithmic trading PDFs — OCR applied automatically")
370
- with gr.Row():
371
- with gr.Column(scale=2):
372
- pdf_in = gr.File(label="Drop PDFs here", file_count="multiple", file_types=[".pdf"])
373
- ext_btn = gr.Button("🚀 Extract Knowledge", variant="primary", size="lg")
374
- with gr.Column(scale=1):
375
- ext_out = gr.Textbox(label="Result", lines=14, interactive=False, elem_classes=["status-box"])
376
- ext_log = gr.Textbox(label="Log", lines=8, interactive=False, elem_classes=["status-box"])
377
- ext_btn.click(fn=run_extraction, inputs=[pdf_in], outputs=[ext_out, ext_log])
378
-
379
- # Tab 2 — Browse
380
- with gr.Tab("📚 Knowledge Base"):
381
- with gr.Tabs():
382
- with gr.Tab("📈 Strategies"):
383
- with gr.Row():
384
- sq = gr.Textbox(label="Search", placeholder="RSI, breakout, Kelly…")
385
- sc = gr.Dropdown(choices=CATS, value="All", label="Category")
386
- sb = gr.Button("🔍 Search", variant="primary")
387
- st = gr.Dataframe(headers=["Name","Category","Description","Sources","Variants"],
388
- datatype=["str"]*4+["number"], interactive=False)
389
- sn = gr.Markdown("")
390
- with gr.Row():
391
- sni = gr.Textbox(label="Name to download")
392
- sdb = gr.Button("⬇️ Download MD"); sdf = gr.File(label="")
393
- szb = gr.Button("📦 Category ZIP"); szf = gr.File(label="")
394
- sb.click(fn=search_strategies, inputs=[sq,sc], outputs=[st,sn])
395
- sdb.click(fn=dl_strategy, inputs=[sni], outputs=[sdf])
396
- szb.click(fn=dl_all_strategies_zip, inputs=[sc], outputs=[szf])
397
- with gr.Tab("∑ Formulas"):
398
- with gr.Row():
399
- fq = gr.Textbox(label="Search", placeholder="Sharpe, Kelly, ATR…")
400
- fb = gr.Button("🔍 Search", variant="primary")
401
- ft = gr.Dataframe(headers=["Name","Category","Purpose","LaTeX","Sources"],
402
- datatype=["str"]*5, interactive=False)
403
- fb.click(fn=search_formulas, inputs=[fq], outputs=[ft])
404
-
405
- # Tab 3 — Backtest
406
- with gr.Tab("🔬 Julia Backtest"):
407
- gr.Markdown(
408
- "### Walk-Forward Backtest — Julia Engine\n"
409
- "Claude generates Julia signal code → Julia compiles + optimizes → "
410
- "MT5 `.set` files pushed to HuggingFace."
411
- )
412
- with gr.Row():
413
- with gr.Column(scale=2):
414
- bt_load = gr.Button("🔄 Load Symbols from HF")
415
- bt_syms = gr.CheckboxGroup(label="Symbols", choices=[], value=[])
416
- bt_tfs = gr.CheckboxGroup(
417
- label="Timeframes", value=["1h","4h"],
418
- choices=["1m","5m","15m","30m","1h","4h","1d"])
419
- bt_filt = gr.Textbox(label="Strategy filter (optional)")
420
- bt_max = gr.Slider(0, 500, value=0, step=10, label="Max strategies (0=all)")
421
- bt_viable= gr.Checkbox(label="Push only VIABLE to HuggingFace", value=True)
422
- bt_run = gr.Button("🚀 Run Julia Backtests", variant="primary", size="lg")
423
- with gr.Column(scale=1):
424
- bt_out = gr.Textbox(label="Summary", lines=12, interactive=False, elem_classes=["status-box"])
425
- bt_log = gr.Textbox(label="Log", lines=12, interactive=False, elem_classes=["status-box"])
426
- bt_load.click(fn=load_symbols, outputs=[bt_syms])
427
- bt_run.click(fn=run_backtests,
428
- inputs=[bt_syms, bt_tfs, bt_filt, bt_max, bt_viable],
429
- outputs=[bt_out, bt_log])
430
-
431
- # Tab 4 — Results
432
- with gr.Tab("🏆 Results"):
433
- gr.Markdown("### Viable Strategies — Download MT5 `.set` & Julia Configs")
434
- res_ref = gr.Button("🔄 Refresh from HuggingFace", variant="primary")
435
- res_tbl = gr.Dataframe(
436
- headers=["Strategy","Symbol","TF","Sharpe","Max DD","Win%","PF","Score"],
437
- datatype=["str"]*8, interactive=False)
438
- res_cnt = gr.Markdown("")
439
- gr.Markdown("#### Download individual file")
440
- with gr.Row():
441
- rn = gr.Textbox(label="Strategy name"); rs = gr.Textbox(label="Symbol")
442
- rt = gr.Textbox(label="Timeframe")
443
- rf = gr.Dropdown(choices=["MT5 .set file","Optimal JSON",
444
- "Julia config","Full report"],
445
- value="MT5 .set file", label="File type")
446
- rdb = gr.Button("⬇️ Download", variant="primary"); rdf = gr.File(label="")
447
- gr.Markdown("#### Batch download all viable strategies")
448
- with gr.Row():
449
- rsb = gr.Button("🎯 All MT5 .set (ZIP)"); rsf = gr.File(label="")
450
- res_ref.click(fn=load_results, outputs=[res_tbl, res_cnt])
451
- rdb.click(fn=dl_result_file, inputs=[rn,rs,rt,rf], outputs=[rdf])
452
- rsb.click(fn=dl_all_sets, outputs=[rsf])
453
- demo.load(fn=load_results, outputs=[res_tbl, res_cnt])
454
-
455
- # Tab 5 — Setup
456
- with gr.Tab("⚙️ Setup & Status"):
457
- gr.Markdown("""### Required Secrets (Space Settings → Variables and Secrets)
458
-
459
- | Secret | Description |
460
- |--------|-------------|
461
- | `ANTHROPIC_API_KEY` | Claude API key |
462
- | `HF_TOKEN` | HuggingFace write token |
463
- | `HF_DATASET_REPO` | `your-username/quant-knowledge-base` |
464
- | `HF_TICK_REPO` | `your-username/tick-data` |
465
-
466
- ### Tick Data Format
467
- Upload to your `tick-data` dataset:
468
- ```
469
- EURUSD/ticks.parquet (columns: timestamp, bid, ask OR open,high,low,close,volume)
470
- BTCUSDT/1h.parquet (pre-built OHLCV — faster)
471
- ```
472
- """)
473
- cfg_ref = gr.Button("🔄 Check Status")
474
- cfg_out = gr.Markdown(check_config())
475
- cfg_ref.click(fn=check_config, outputs=[cfg_out])
476
-
477
- gr.HTML("""<div style="text-align:center;padding:.8em;color:#9ca3af;font-size:.75em">
478
- Quant Knowledge Extractor · Julia 1.10 Engine · HuggingFace Spaces
479
- </div>""")
480
-
481
- if __name__ == "__main__":
482
- demo.launch()