cyberkyne commited on
Commit
094a5f6
Β·
verified Β·
1 Parent(s): bbe52bd

Upload 22 files

Browse files
Dockerfile ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ # ── System dependencies ───────────────────────────────
4
+ RUN apt-get update && apt-get install -y \
5
+ curl wget git ca-certificates \
6
+ tesseract-ocr tesseract-ocr-eng \
7
+ poppler-utils \
8
+ libgl1 libglib2.0-0 libsm6 libxext6 \
9
+ build-essential gfortran \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ # ── Install Julia 1.10 LTS ────────────────────────────
13
+ ENV JULIA_VERSION=1.10.7
14
+ RUN wget -q https://julialang-s3.julialang.org/bin/linux/x64/1.10/julia-${JULIA_VERSION}-linux-x86_64.tar.gz \
15
+ && tar -xzf julia-${JULIA_VERSION}-linux-x86_64.tar.gz \
16
+ && mv julia-${JULIA_VERSION} /usr/local/julia \
17
+ && ln -s /usr/local/julia/bin/julia /usr/local/bin/julia \
18
+ && rm julia-${JULIA_VERSION}-linux-x86_64.tar.gz \
19
+ && julia --version
20
+
21
+ # ── Julia / app environment ───────────────────────────
22
+ ENV JULIA_DEPOT_PATH=/app/.julia
23
+ ENV JULIA_NUM_THREADS=4
24
+ ENV JULIA_PROJECT=/app/src
25
+
26
+ WORKDIR /app
27
+
28
+ # ── Python dependencies ───────────────────────────────
29
+ COPY requirements.txt /tmp/requirements.txt
30
+ RUN pip install --no-cache-dir -r /tmp/requirements.txt
31
+
32
+ # ── Copy project files ────────────────────────────────
33
+ COPY . .
34
+
35
+ # ── Step 1: Resolve + precompile Julia packages ───────
36
+ # Write Julia code to a temp script to avoid Docker misreading
37
+ # Julia keywords (using/import) as Dockerfile instructions.
38
+ RUN printf 'import Pkg\nPkg.instantiate()\nPkg.precompile()\nprintln("Julia packages resolved")\n' \
39
+ | julia --project=/app/src
40
+
41
+ # ── Step 2: Warmup β€” JIT-compile all hot paths ────────
42
+ RUN julia --project=/app/src /app/src/warmup.jl
43
+
44
+ # ── Step 3: Pre-warm juliacall Python↔Julia bridge ────
45
+ RUN python3 /app/src/warmup_bridge.py
46
+
47
+ # ── Runtime ───────────────────────────────────────────
48
+ ENV GRADIO_SERVER_NAME=0.0.0.0
49
+ ENV GRADIO_SERVER_PORT=7860
50
+ ENV JULIA_PROJECT=/app/src
51
+ ENV JULIA_DEPOT_PATH=/app/.julia
52
+
53
+ EXPOSE 7860
54
+ CMD ["python", "app.py"]
README.md ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Quant Knowledge Extractor
3
+ emoji: πŸ“Š
4
+ colorFrom: green
5
+ colorTo: gray
6
+ sdk: docker
7
+ pinned: true
8
+ license: mit
9
+ ---
10
+
11
+ # πŸ“Š Quant Knowledge Extractor β€” Julia Engine
12
+
13
+ Upload PDFs β†’ Extract strategies β†’ Backtest with Julia β†’ Download MT5 `.set` files.
14
+
15
+ **Julia handles all computation** (indicators, backtest engine, walk-forward optimizer).
16
+ Python handles only UI (Gradio) and API calls (Claude, HuggingFace).
17
+
18
+ ## Setup β€” Add these Secrets in Space Settings β†’ Variables and Secrets
19
+
20
+ | Secret | Description |
21
+ |--------|-------------|
22
+ | `ANTHROPIC_API_KEY` | Claude API key |
23
+ | `HF_TOKEN` | HuggingFace write token |
24
+ | `HF_DATASET_REPO` | `your-username/quant-knowledge-base` |
25
+ | `HF_TICK_REPO` | `your-username/tick-data` |
app.py ADDED
@@ -0,0 +1,482 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ app.py β€” HuggingFace Spaces entry point.
3
+
4
+ Architecture:
5
+ Python : Gradio UI, Claude API calls, HF I/O, PDF processing
6
+ Julia : Indicators, BacktestEngine, WalkForwardOptimizer, SignalCompiler
7
+
8
+ Python NEVER does numerical computation. It only:
9
+ 1. Calls Claude API (extraction + strategy code generation)
10
+ 2. Calls Julia via juliacall for all math
11
+ 3. Reads/writes HuggingFace datasets
12
+ 4. Renders Gradio UI
13
+ """
14
+
15
+ import io, json, zipfile, tempfile
16
+ from pathlib import Path
17
+ from datetime import datetime
18
+
19
+ import gradio as gr
20
+ from loguru import logger
21
+
22
+ import utils.config as cfg
23
+ import utils.hf_io as hf
24
+ from pipeline.pdf_processor import PDFProcessor
25
+ from pipeline.extractor import AIExtractor, Deduplicator
26
+ from pipeline.julia_bridge import full_backtest_pipeline, julia_available
27
+ from pipeline.exporter import (
28
+ slugify, strategy_md, formula_md,
29
+ backtest_report_md, optimal_json, mt5_set,
30
+ julia_config, index_md,
31
+ )
32
+
33
+ # ── Lazy KB ───────────────────────────────────────────
34
+ _kb = None
35
+ def get_kb():
36
+ global _kb
37
+ if _kb is None: _kb = hf.kb_load()
38
+ return _kb
39
+ def reset_kb():
40
+ global _kb; _kb = hf.kb_load()
41
+
42
+
43
+ # ═══════════════════════════════════════════════════
44
+ # TAB 1 β€” UPLOAD & EXTRACT
45
+ # ═══════════════════════════════════════════════════
46
+
47
+ def run_extraction(pdf_files, progress=gr.Progress()):
48
+ if not pdf_files: return "⚠️ No PDFs uploaded.", ""
49
+ if not cfg.ANTHROPIC_API_KEY: return "❌ ANTHROPIC_API_KEY secret not set.", ""
50
+ if not cfg.HF_DATASET_REPO: return "❌ HF_DATASET_REPO secret not set.", ""
51
+
52
+ proc = PDFProcessor()
53
+ ai = AIExtractor()
54
+ dedup = Deduplicator()
55
+ kb = get_kb()
56
+ log = []
57
+ totals = {k:{"added":0,"merged":0,"skipped":0} for k in ("strategies","formulas","systems")}
58
+ hf_files = []
59
+
60
+ for i, pdf_file in enumerate(pdf_files):
61
+ path = Path(pdf_file.name)
62
+ progress(i/len(pdf_files), desc=f"{path.name}")
63
+ log.append(f"\nπŸ“– [{i+1}/{len(pdf_files)}] {path.name}")
64
+ try:
65
+ chunks = list(proc.process(path))
66
+ log.append(f" β†’ {len(chunks)} chunks")
67
+ except Exception as e:
68
+ log.append(f" ❌ {e}"); continue
69
+
70
+ for chunk in chunks:
71
+ extracted = ai.extract(chunk)
72
+ stats = dedup.process(extracted, kb)
73
+ for kind in ("strategies","formulas","systems"):
74
+ for act in ("added","merged","skipped"):
75
+ totals[kind][act] += stats[kind][act]
76
+
77
+ log.append(f" β†’ New: {totals['strategies']['added']} strats, {totals['formulas']['added']} formulas")
78
+ if cfg.HF_TOKEN: hf.pdf_upload(path)
79
+
80
+ for cid, rec in kb["strategies"].items():
81
+ hf_files.append((f"extracted/strategies/{slugify(rec.get('name',''))}.md",
82
+ strategy_md(rec).encode()))
83
+ for cid, rec in kb["formulas"].items():
84
+ hf_files.append((f"extracted/formulas/{slugify(rec.get('name',''))}.md",
85
+ formula_md(rec).encode()))
86
+
87
+ progress(0.9, desc="Saving to HuggingFace…")
88
+ hf.kb_save(kb)
89
+ if hf_files and cfg.HF_TOKEN:
90
+ pushed = hf.push_batch(hf_files, "Update extracted knowledge")
91
+ log.append(f"\n☁️ Pushed {pushed} files to HuggingFace")
92
+ reset_kb()
93
+
94
+ counts = {k: len(kb[k]) for k in kb}
95
+ summary = f"""βœ… Extraction Complete
96
+
97
+ PDFs processed: {len(pdf_files)}
98
+ Strategies β€” added: {totals['strategies']['added']} merged: {totals['strategies']['merged']} skipped: {totals['strategies']['skipped']}
99
+ Formulas β€” added: {totals['formulas']['added']} merged: {totals['formulas']['merged']} skipped: {totals['formulas']['skipped']}
100
+ Systems β€” added: {totals['systems']['added']} merged: {totals['systems']['merged']} skipped: {totals['systems']['skipped']}
101
+
102
+ KB totals: {counts['strategies']} strategies Β· {counts['formulas']} formulas Β· {counts['systems']} systems
103
+ Tokens used: {ai.tokens_used:,}"""
104
+ return summary, "\n".join(log[-40:])
105
+
106
+
107
+ # ═══════════════════════════════════════════════════
108
+ # TAB 2 β€” BROWSE KB
109
+ # ═══════════════════════════════════════════════════
110
+
111
+ def search_strategies(query, category):
112
+ kb = get_kb(); items = list(kb["strategies"].values())
113
+ if category and category != "All":
114
+ items = [x for x in items if x.get("category") == category]
115
+ if query:
116
+ q = query.lower()
117
+ items = [x for x in items if q in x.get("name","").lower() or q in x.get("description","").lower()]
118
+ rows = [[x.get("name","")[:50], x.get("category",""),
119
+ x.get("description","")[:100],
120
+ ", ".join(x.get("sources",[]))[:40], len(x.get("layers",[]))]
121
+ for x in items[:100]]
122
+ return rows, f"{len(items)} strategies"
123
+
124
+ def search_formulas(query):
125
+ kb = get_kb(); items = list(kb["formulas"].values())
126
+ if query:
127
+ q = query.lower()
128
+ items = [x for x in items if q in x.get("name","").lower() or q in x.get("purpose","").lower()]
129
+ return [[x.get("name","")[:50], x.get("category",""),
130
+ x.get("purpose","")[:80],
131
+ "βœ…" if x.get("latex") else "β€”",
132
+ ", ".join(x.get("sources",[]))[:40]] for x in items[:100]]
133
+
134
+ def dl_strategy(name):
135
+ kb = get_kb()
136
+ for rec in kb["strategies"].values():
137
+ if rec.get("name","").lower() == name.strip().lower():
138
+ tmp = tempfile.mktemp(suffix=".md")
139
+ Path(tmp).write_text(strategy_md(rec), encoding="utf-8")
140
+ return tmp
141
+ return None
142
+
143
+ def dl_all_strategies_zip(category):
144
+ kb = get_kb(); items = list(kb["strategies"].values())
145
+ if category and category != "All":
146
+ items = [x for x in items if x.get("category") == category]
147
+ tmp = tempfile.mktemp(suffix=".zip")
148
+ with zipfile.ZipFile(tmp, "w", zipfile.ZIP_DEFLATED) as zf:
149
+ for rec in items:
150
+ zf.writestr(f"{slugify(rec.get('name','unknown'))}.md", strategy_md(rec))
151
+ return tmp
152
+
153
+
154
+ # ═══════════════════════════════════════════════════
155
+ # TAB 3 β€” BACKTEST (Julia Engine)
156
+ # ═══════════════════════════════════════════════════
157
+
158
+ def load_symbols():
159
+ syms = hf.tick_list_symbols()
160
+ return gr.update(choices=syms, value=syms[:2] if len(syms)>=2 else syms)
161
+
162
+
163
+ def run_backtests(selected_symbols, selected_timeframes,
164
+ strategy_filter, max_strategies, viable_only,
165
+ progress=gr.Progress()):
166
+
167
+ if not cfg.HF_TICK_REPO: return "❌ HF_TICK_REPO not set.", ""
168
+ if not cfg.ANTHROPIC_API_KEY: return "❌ ANTHROPIC_API_KEY not set.", ""
169
+ if not julia_available(): return "❌ Julia runtime not available. Check build logs.", ""
170
+
171
+ ai = AIExtractor()
172
+ kb = get_kb()
173
+ strats = list(kb["strategies"].values())
174
+ if strategy_filter:
175
+ strats = [s for s in strats if strategy_filter.lower() in s.get("name","").lower()]
176
+ if max_strategies > 0:
177
+ strats = strats[:int(max_strategies)]
178
+ if not strats: return "⚠️ No strategies. Run extraction first.", ""
179
+
180
+ symbols = selected_symbols or hf.tick_list_symbols()[:2]
181
+ timeframes = selected_timeframes or ["1h"]
182
+
183
+ log, all_results, viable_count = [], [], 0
184
+
185
+ for si, rec in enumerate(strats):
186
+ name = rec.get("name","?")
187
+ progress(si/len(strats), desc=f"[{si+1}/{len(strats)}] {name[:35]}")
188
+
189
+ # 1. Generate Julia signal code via Claude
190
+ jl_code = ai.compile_strategy_code(rec)
191
+ if not jl_code:
192
+ log.append(f"❌ Code gen failed: {name[:40]}"); continue
193
+ log.append(f"βœ… Julia code generated: {name[:40]}")
194
+
195
+ for sym in symbols:
196
+ for tf in timeframes:
197
+ df = hf.tick_load(sym, tf)
198
+ if df is None or len(df) < 200:
199
+ log.append(f" ⚠️ {sym} {tf}: no data"); continue
200
+
201
+ # 2. Full Julia pipeline (compile β†’ optimize β†’ backtest)
202
+ result = full_backtest_pipeline(
203
+ strategy_code = jl_code,
204
+ strategy_name = name,
205
+ open_p = df["open"].values,
206
+ high = df["high"].values,
207
+ low = df["low"].values,
208
+ close = df["close"].values,
209
+ volume = df["volume"].values,
210
+ timeframe = tf,
211
+ symbol = sym,
212
+ n_windows = cfg.WF_WINDOWS,
213
+ is_ratio = cfg.WF_IS_RATIO,
214
+ min_trades = cfg.MIN_TRADES,
215
+ min_sharpe = cfg.MIN_SHARPE,
216
+ max_combos = cfg.MAX_PARAM_COMBOS,
217
+ initial_equity = cfg.INITIAL_EQUITY,
218
+ commission_pct = cfg.COMMISSION_PCT,
219
+ risk_per_trade = cfg.RISK_PER_TRADE,
220
+ )
221
+ all_results.append(result)
222
+
223
+ # 3. Build + push output files
224
+ if cfg.HF_TOKEN and cfg.HF_DATASET_REPO:
225
+ if not viable_only or result.get("is_viable"):
226
+ hf.push_result(
227
+ name, sym, tf,
228
+ backtest_report_md(result, rec),
229
+ optimal_json(result, rec),
230
+ mt5_set(result, rec),
231
+ julia_config(result),
232
+ )
233
+
234
+ status = "βœ…" if result.get("is_viable") else "❌"
235
+ log.append(
236
+ f" {status} {sym} {tf}: "
237
+ f"Sharpe={result.get('oos_sharpe_mean',0):.2f} "
238
+ f"DD={result.get('oos_max_dd',0):.1f}% "
239
+ f"Score={result.get('robustness',0):.0f}")
240
+ if result.get("is_viable"): viable_count += 1
241
+
242
+ # 4. Push master index
243
+ if all_results and cfg.HF_TOKEN:
244
+ hf.push_index(index_md(all_results), {
245
+ "generated": datetime.now().isoformat(),
246
+ "engine": "Julia 1.10",
247
+ "total_strategies": len(all_results),
248
+ "viable_count": viable_count,
249
+ "strategies": all_results,
250
+ })
251
+
252
+ summary = f"""🏁 Julia Backtest Complete
253
+
254
+ Engine: Julia 1.10 BacktestEngine.jl
255
+ Strategies compiled: {len(strats)}
256
+ Combinations tested: {len(all_results)}
257
+ Viable strategies: {viable_count}
258
+ Pass rate: {viable_count/max(len(all_results),1)*100:.1f}%
259
+
260
+ Results on HuggingFace:
261
+ {cfg.HF_DATASET_REPO}/optimal_sets/BACKTEST_INDEX.md"""
262
+ return summary, "\n".join(log[-60:])
263
+
264
+
265
+ # ═══════════════════════════════════════════════════
266
+ # TAB 4 β€” RESULTS
267
+ # ═══════════════════════════════════════════════════
268
+
269
+ def load_results():
270
+ data = hf.fetch_index()
271
+ if not data: return [], "No results yet."
272
+ strats = data.get("strategies",[])
273
+ viable = sorted([s for s in strats if s.get("is_viable")],
274
+ key=lambda x: x.get("oos_sharpe_mean",0), reverse=True)
275
+ rows = [[s.get("strategy","")[:45], s.get("symbol",""), s.get("timeframe",""),
276
+ f'{s.get("oos_sharpe_mean",0):.2f}', f'{s.get("oos_max_dd",0):.1f}%',
277
+ f'{s.get("oos_win_rate",0):.1f}%', f'{s.get("oos_pf_mean",0):.2f}',
278
+ f'{s.get("robustness",0):.0f}'] for s in viable]
279
+ count = (f"βœ… {len(viable)} viable / {len(strats)} tested | "
280
+ f"Engine: Julia | {data.get('generated','')[:16]}")
281
+ return rows, count
282
+
283
+ def dl_result_file(name, symbol, tf, ftype):
284
+ sl = slugify(name); sym = symbol.upper().strip()
285
+ pre = f"{sl}_{sym}_{tf}"
286
+ ext_map = {"MT5 .set file": f"optimal_sets/{pre}.set",
287
+ "Optimal JSON": f"optimal_sets/{pre}_optimal.json",
288
+ "Julia config": f"optimal_sets/{pre}_config.jl",
289
+ "Full report": f"backtests/{sl}/{pre}_report.md"}
290
+ remote = ext_map.get(ftype,"")
291
+ if not remote: return None
292
+ data = hf.fetch_file(remote)
293
+ if not data: return None
294
+ tmp = tempfile.mktemp(suffix=Path(remote).suffix)
295
+ Path(tmp).write_bytes(data)
296
+ return tmp
297
+
298
+ def dl_all_sets():
299
+ data = hf.fetch_index()
300
+ if not data: return None
301
+ tmp = tempfile.mktemp(suffix=".zip")
302
+ with zipfile.ZipFile(tmp,"w",zipfile.ZIP_DEFLATED) as zf:
303
+ for s in data.get("strategies",[]):
304
+ if not s.get("is_viable"): continue
305
+ sl = slugify(s["strategy"]); sym = s["symbol"]; tf = s["timeframe"]
306
+ content = hf.fetch_file(f"optimal_sets/{sl}_{sym}_{tf}.set")
307
+ if content: zf.writestr(f"{sl}_{sym}_{tf}.set", content)
308
+ return tmp
309
+
310
+
311
+ # ═══════════════════════════════════════════════════
312
+ # TAB 5 β€” SETUP
313
+ # ═══════════════════════════════════════════════════
314
+
315
+ def check_config():
316
+ checks = [
317
+ ("ANTHROPIC_API_KEY", cfg.ANTHROPIC_API_KEY, "Claude API"),
318
+ ("HF_TOKEN", cfg.HF_TOKEN, "HF write access"),
319
+ ("HF_DATASET_REPO", cfg.HF_DATASET_REPO, "Results storage"),
320
+ ("HF_TICK_REPO", cfg.HF_TICK_REPO, "Tick data source"),
321
+ ]
322
+ kb = get_kb()
323
+ symbols = hf.tick_list_symbols() if cfg.HF_TICK_REPO else []
324
+ jl_ok = julia_available()
325
+
326
+ lines = ["## Configuration Status", ""]
327
+ for name, val, desc in checks:
328
+ icon = "βœ…" if val else "❌"
329
+ lines.append(f"{icon} `{name}` β€” {desc}")
330
+
331
+ lines += ["", "## Julia Engine", "",
332
+ f"{'βœ…' if jl_ok else '❌'} Julia runtime: {'available' if jl_ok else 'not available (check build logs)'}",
333
+ "", "## Data Status", "",
334
+ f"- Tick symbols: **{len(symbols)}** β€” {', '.join(symbols[:8])}",
335
+ f"- Strategies in KB: **{len(kb['strategies'])}**",
336
+ f"- Formulas in KB: **{len(kb['formulas'])}**",
337
+ "", "## Backtest Settings", "",
338
+ f"- WF Windows: `{cfg.WF_WINDOWS}` Β· IS Ratio: `{cfg.WF_IS_RATIO}`",
339
+ f"- Min Trades: `{cfg.MIN_TRADES}` Β· Min Sharpe: `{cfg.MIN_SHARPE}`",
340
+ f"- Commission: `{cfg.COMMISSION_PCT*100:.3f}%` Β· Risk/trade: `{cfg.RISK_PER_TRADE*100:.1f}%`",
341
+ f"- Timeframes: `{', '.join(cfg.BACKTEST_TFS)}`"]
342
+ return "\n".join(lines)
343
+
344
+
345
+ # ═══════════════════════════════════════════════════
346
+ # BUILD APP
347
+ # ═══════════════════════════════════════════════════
348
+
349
+ CATS = ["All"] + cfg.CATEGORIES
350
+
351
+ with gr.Blocks(
352
+ title="Quant Knowledge Extractor β€” Julia Engine",
353
+ theme=gr.themes.Base(primary_hue="green", neutral_hue="gray"),
354
+ css=".status-box{font-family:monospace;font-size:.82em}"
355
+ ) as demo:
356
+
357
+ gr.HTML("""
358
+ <div style="text-align:center;padding:1.2em 0 .3em">
359
+ <h1 style="font-size:2em;color:#16a34a;margin:0">πŸ“Š Quant Knowledge Extractor</h1>
360
+ <p style="color:#6b7280;margin:.4em 0 0">
361
+ Julia 1.10 Engine Β· BacktestEngine.jl Β· WalkForward Optimizer Β· MT5 .set Output
362
+ </p>
363
+ </div>""")
364
+
365
+ with gr.Tabs():
366
+
367
+ # Tab 1 β€” Extract
368
+ with gr.Tab("πŸ“€ Upload & Extract"):
369
+ gr.Markdown("### Upload algorithmic trading PDFs β€” OCR applied automatically")
370
+ with gr.Row():
371
+ with gr.Column(scale=2):
372
+ pdf_in = gr.File(label="Drop PDFs here", file_count="multiple", file_types=[".pdf"])
373
+ ext_btn = gr.Button("πŸš€ Extract Knowledge", variant="primary", size="lg")
374
+ with gr.Column(scale=1):
375
+ ext_out = gr.Textbox(label="Result", lines=14, interactive=False, elem_classes=["status-box"])
376
+ ext_log = gr.Textbox(label="Log", lines=8, interactive=False, elem_classes=["status-box"])
377
+ ext_btn.click(fn=run_extraction, inputs=[pdf_in], outputs=[ext_out, ext_log])
378
+
379
+ # Tab 2 β€” Browse
380
+ with gr.Tab("πŸ“š Knowledge Base"):
381
+ with gr.Tabs():
382
+ with gr.Tab("πŸ“ˆ Strategies"):
383
+ with gr.Row():
384
+ sq = gr.Textbox(label="Search", placeholder="RSI, breakout, Kelly…")
385
+ sc = gr.Dropdown(choices=CATS, value="All", label="Category")
386
+ sb = gr.Button("πŸ” Search", variant="primary")
387
+ st = gr.Dataframe(headers=["Name","Category","Description","Sources","Variants"],
388
+ datatype=["str"]*4+["number"], interactive=False)
389
+ sn = gr.Markdown("")
390
+ with gr.Row():
391
+ sni = gr.Textbox(label="Name to download")
392
+ sdb = gr.Button("⬇️ Download MD"); sdf = gr.File(label="")
393
+ szb = gr.Button("πŸ“¦ Category ZIP"); szf = gr.File(label="")
394
+ sb.click(fn=search_strategies, inputs=[sq,sc], outputs=[st,sn])
395
+ sdb.click(fn=dl_strategy, inputs=[sni], outputs=[sdf])
396
+ szb.click(fn=dl_all_strategies_zip, inputs=[sc], outputs=[szf])
397
+ with gr.Tab("βˆ‘ Formulas"):
398
+ with gr.Row():
399
+ fq = gr.Textbox(label="Search", placeholder="Sharpe, Kelly, ATR…")
400
+ fb = gr.Button("πŸ” Search", variant="primary")
401
+ ft = gr.Dataframe(headers=["Name","Category","Purpose","LaTeX","Sources"],
402
+ datatype=["str"]*5, interactive=False)
403
+ fb.click(fn=search_formulas, inputs=[fq], outputs=[ft])
404
+
405
+ # Tab 3 β€” Backtest
406
+ with gr.Tab("πŸ”¬ Julia Backtest"):
407
+ gr.Markdown(
408
+ "### Walk-Forward Backtest β€” Julia Engine\n"
409
+ "Claude generates Julia signal code β†’ Julia compiles + optimizes β†’ "
410
+ "MT5 `.set` files pushed to HuggingFace."
411
+ )
412
+ with gr.Row():
413
+ with gr.Column(scale=2):
414
+ bt_load = gr.Button("πŸ”„ Load Symbols from HF")
415
+ bt_syms = gr.CheckboxGroup(label="Symbols", choices=[], value=[])
416
+ bt_tfs = gr.CheckboxGroup(
417
+ label="Timeframes", value=["1h","4h"],
418
+ choices=["1m","5m","15m","30m","1h","4h","1d"])
419
+ bt_filt = gr.Textbox(label="Strategy filter (optional)")
420
+ bt_max = gr.Slider(0, 500, value=0, step=10, label="Max strategies (0=all)")
421
+ bt_viable= gr.Checkbox(label="Push only VIABLE to HuggingFace", value=True)
422
+ bt_run = gr.Button("πŸš€ Run Julia Backtests", variant="primary", size="lg")
423
+ with gr.Column(scale=1):
424
+ bt_out = gr.Textbox(label="Summary", lines=12, interactive=False, elem_classes=["status-box"])
425
+ bt_log = gr.Textbox(label="Log", lines=12, interactive=False, elem_classes=["status-box"])
426
+ bt_load.click(fn=load_symbols, outputs=[bt_syms])
427
+ bt_run.click(fn=run_backtests,
428
+ inputs=[bt_syms, bt_tfs, bt_filt, bt_max, bt_viable],
429
+ outputs=[bt_out, bt_log])
430
+
431
+ # Tab 4 β€” Results
432
+ with gr.Tab("πŸ† Results"):
433
+ gr.Markdown("### Viable Strategies β€” Download MT5 `.set` & Julia Configs")
434
+ res_ref = gr.Button("πŸ”„ Refresh from HuggingFace", variant="primary")
435
+ res_tbl = gr.Dataframe(
436
+ headers=["Strategy","Symbol","TF","Sharpe","Max DD","Win%","PF","Score"],
437
+ datatype=["str"]*8, interactive=False)
438
+ res_cnt = gr.Markdown("")
439
+ gr.Markdown("#### Download individual file")
440
+ with gr.Row():
441
+ rn = gr.Textbox(label="Strategy name"); rs = gr.Textbox(label="Symbol")
442
+ rt = gr.Textbox(label="Timeframe")
443
+ rf = gr.Dropdown(choices=["MT5 .set file","Optimal JSON",
444
+ "Julia config","Full report"],
445
+ value="MT5 .set file", label="File type")
446
+ rdb = gr.Button("⬇️ Download", variant="primary"); rdf = gr.File(label="")
447
+ gr.Markdown("#### Batch download all viable strategies")
448
+ with gr.Row():
449
+ rsb = gr.Button("🎯 All MT5 .set (ZIP)"); rsf = gr.File(label="")
450
+ res_ref.click(fn=load_results, outputs=[res_tbl, res_cnt])
451
+ rdb.click(fn=dl_result_file, inputs=[rn,rs,rt,rf], outputs=[rdf])
452
+ rsb.click(fn=dl_all_sets, outputs=[rsf])
453
+ demo.load(fn=load_results, outputs=[res_tbl, res_cnt])
454
+
455
+ # Tab 5 β€” Setup
456
+ with gr.Tab("βš™οΈ Setup & Status"):
457
+ gr.Markdown("""### Required Secrets (Space Settings β†’ Variables and Secrets)
458
+
459
+ | Secret | Description |
460
+ |--------|-------------|
461
+ | `ANTHROPIC_API_KEY` | Claude API key |
462
+ | `HF_TOKEN` | HuggingFace write token |
463
+ | `HF_DATASET_REPO` | `your-username/quant-knowledge-base` |
464
+ | `HF_TICK_REPO` | `your-username/tick-data` |
465
+
466
+ ### Tick Data Format
467
+ Upload to your `tick-data` dataset:
468
+ ```
469
+ EURUSD/ticks.parquet (columns: timestamp, bid, ask OR open,high,low,close,volume)
470
+ BTCUSDT/1h.parquet (pre-built OHLCV β€” faster)
471
+ ```
472
+ """)
473
+ cfg_ref = gr.Button("πŸ”„ Check Status")
474
+ cfg_out = gr.Markdown(check_config())
475
+ cfg_ref.click(fn=check_config, outputs=[cfg_out])
476
+
477
+ gr.HTML("""<div style="text-align:center;padding:.8em;color:#9ca3af;font-size:.75em">
478
+ Quant Knowledge Extractor Β· Julia 1.10 Engine Β· HuggingFace Spaces
479
+ </div>""")
480
+
481
+ if __name__ == "__main__":
482
+ demo.launch()
pipeline/__init__.py ADDED
File without changes
pipeline/exporter.py ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """pipeline/exporter.py β€” Generate markdown, MT5 .set, Julia config strings."""
2
+ import re, json
3
+ from datetime import datetime
4
+
5
+
6
+ def slugify(t):
7
+ t = t.lower().strip()
8
+ t = re.sub(r"[^\w\s-]","",t)
9
+ t = re.sub(r"[\s_-]+","-",t)
10
+ return t[:50]
11
+
12
+
13
+ def strategy_md(rec):
14
+ name = rec.get("name","Unknown")
15
+ sources = rec.get("sources",[])
16
+ layers = rec.get("layers",[])
17
+ lines = [f"# {name}", "",
18
+ f"> **Category:** {rec.get('category','')} ",
19
+ f"> **Sources:** {', '.join(sources)[:80]} ",
20
+ f"> **ID:** `{rec.get('canonical_id','')}`",
21
+ "", "---", "", "## Description", "", rec.get("description",""), ""]
22
+ for label, key in [("## Entry Rules","entry_rules"),("## Exit Rules","exit_rules"),("## Filters","filters")]:
23
+ items = rec.get(key,[])
24
+ if items:
25
+ lines += [label, ""]
26
+ for i,r in enumerate(items,1): lines.append(f"{i}. {r}")
27
+ lines.append("")
28
+ if rec.get("mathematical_basis"):
29
+ lines += ["## Mathematical Basis", "", rec["mathematical_basis"], ""]
30
+ if layers:
31
+ lines += [f"## Variants ({len(layers)} sources)", ""]
32
+ for i,l in enumerate(layers,1):
33
+ d = l.get("data",{}); lines.append(f"### Variant {i} β€” {l.get('source_file','')}")
34
+ if d.get("description"): lines.append(d["description"])
35
+ lines.append("")
36
+ lines += ["---","","*Generated by Quant Knowledge Extractor β€” Julia Engine*"]
37
+ return "\n".join(lines)
38
+
39
+
40
+ def formula_md(rec):
41
+ lines = [f"# {rec.get('name','Unknown')}", "",
42
+ f"> **Category:** {rec.get('category','')} ",
43
+ f"> **Sources:** {', '.join(rec.get('sources',[]))[:80]}",
44
+ "", "---", ""]
45
+ if rec.get("latex"):
46
+ lines += ["## Formula (LaTeX)", "", "$$", rec["latex"], "$$", ""]
47
+ if rec.get("plain_text"):
48
+ lines += ["## Plain Text", "", "```", rec["plain_text"], "```", ""]
49
+ if rec.get("purpose"):
50
+ lines += ["## Purpose", "", rec["purpose"], ""]
51
+ if rec.get("variables"):
52
+ lines += ["## Variables", "", "| Symbol | Description |", "|--------|-------------|"]
53
+ for s,d in rec["variables"].items(): lines.append(f"| `{s}` | {d} |")
54
+ lines.append("")
55
+ lines += ["---","","*Generated by Quant Knowledge Extractor β€” Julia Engine*"]
56
+ return "\n".join(lines)
57
+
58
+
59
+ def backtest_report_md(result: dict, record: dict) -> str:
60
+ name = result.get("strategy","Unknown")
61
+ sym = result.get("symbol","")
62
+ tf = result.get("timeframe","")
63
+ viable = result.get("is_viable", False)
64
+ status = "βœ… VIABLE" if viable else "❌ NOT VIABLE"
65
+ now = datetime.now().strftime("%Y-%m-%d %H:%M UTC")
66
+ lines = [
67
+ f"# {name}", f"## Backtest Report β€” {sym} {tf}", "",
68
+ f"> **Status:** {status} ",
69
+ f"> **Robustness:** {result.get('robustness',0):.0f}/100 ",
70
+ f"> **Engine:** Julia 1.10 ",
71
+ f"> **Generated:** {now}", "", "---", "", "## Viability", "",
72
+ ]
73
+ for r in result.get("reasons",[]): lines.append(f"- {r}")
74
+ lines += [""]
75
+ if result.get("optimal_params"):
76
+ lines += ["## Optimal Parameters (Julia Walk-Forward)", "",
77
+ "| Parameter | Value |", "|-----------|-------|"]
78
+ for k,v in result["optimal_params"].items():
79
+ lines.append(f"| `{k}` | `{v:.4g}` |")
80
+ lines += [""]
81
+ lines += [
82
+ "## OOS Performance (Walk-Forward Aggregate)", "",
83
+ "| Metric | Value |", "|--------|-------|",
84
+ f"| Sharpe (meanΒ±std) | `{result.get('oos_sharpe_mean',0):.3f} Β± {result.get('oos_sharpe_std',0):.3f}` |",
85
+ f"| Win Rate | `{result.get('oos_win_rate',0):.1f}%` |",
86
+ f"| Max Drawdown (mean) | `{result.get('oos_max_dd',0):.2f}%` |",
87
+ f"| Profit Factor | `{result.get('oos_pf_mean',0):.3f}` |",
88
+ f"| Total OOS Trades | `{result.get('oos_trades',0)}` |",
89
+ f"| WF Efficiency Ratio | `{result.get('wf_efficiency',0):.3f}` |",
90
+ f"| Robustness Score | `{result.get('robustness',0):.0f}/100` |",
91
+ ]
92
+ if record.get("description"):
93
+ lines += ["", "## Strategy Description", "", record["description"]]
94
+ lines += ["", "---", "", "*Backtested using Julia 1.10 β€” BacktestEngine.jl + Optimizer.jl*"]
95
+ return "\n".join(lines)
96
+
97
+
98
+ def optimal_json(result: dict, record: dict) -> dict:
99
+ return {
100
+ "metadata": {
101
+ "strategy": result.get("strategy"), "symbol": result.get("symbol"),
102
+ "timeframe": result.get("timeframe"),
103
+ "generated": datetime.now().isoformat(),
104
+ "engine": "Julia 1.10",
105
+ "is_viable": result.get("is_viable"), "robustness": result.get("robustness"),
106
+ },
107
+ "optimal_parameters": result.get("optimal_params",{}),
108
+ "performance": {
109
+ "oos_sharpe_mean": round(result.get("oos_sharpe_mean",0),4),
110
+ "oos_sharpe_std": round(result.get("oos_sharpe_std",0),4),
111
+ "oos_win_rate": round(result.get("oos_win_rate",0),2),
112
+ "oos_max_dd_pct": round(result.get("oos_max_dd",0),2),
113
+ "oos_pf_mean": round(result.get("oos_pf_mean",0),4),
114
+ "oos_total_trades":result.get("oos_trades",0),
115
+ "wf_efficiency": round(result.get("wf_efficiency",0),4),
116
+ },
117
+ "viability": {"is_viable": result.get("is_viable"), "reasons": result.get("reasons",[])},
118
+ }
119
+
120
+
121
+ def mt5_set(result: dict, record: dict) -> str:
122
+ name = result.get("strategy","Unknown")
123
+ sym = result.get("symbol",""); tf = result.get("timeframe","")
124
+ params = result.get("optimal_params",{})
125
+ lines = [
126
+ f"; MT5 Strategy Tester Set File β€” Generated by Julia Engine",
127
+ f"; Strategy: {name}", f"; Symbol: {sym} Timeframe: {tf}",
128
+ f"; OOS Sharpe: {result.get('oos_sharpe_mean',0):.3f} Score: {result.get('robustness',0):.0f}/100",
129
+ f"; Viable: {result.get('is_viable', False)}",
130
+ f";", f"; 1. Open MT5 β†’ View β†’ Strategy Tester",
131
+ f"; 2. Select Expert Advisor", f"; 3. Inputs β†’ Open β†’ select this file", "",
132
+ ]
133
+ for k, v in params.items():
134
+ mt5k = "".join(w.capitalize() for w in k.replace("-","_").split("_"))
135
+ try:
136
+ fv = float(v); step = max(1.0, abs(fv)*0.1)
137
+ lines.append(f"{mt5k}={fv:.4g}||{max(0,fv-step*3):.4g}||{step:.4g}||{fv+step*3:.4g}||1")
138
+ except: lines.append(f"{mt5k}={v}")
139
+ lines += ["","RiskPercent=1.0||0.5||0.5||3.0||1","LotSize=0.1||0.01||0.01||1.0||1"]
140
+ return "\n".join(lines)
141
+
142
+
143
+ def julia_config(result: dict) -> str:
144
+ name = result.get("strategy","Unknown")
145
+ sym = result.get("symbol",""); tf = result.get("timeframe","")
146
+ struct = "".join(w.capitalize() for w in name.split()[:4])
147
+ params = result.get("optimal_params",{})
148
+ plines = "\n ".join(
149
+ f'{k.lower().replace("-","_")} = {float(v):.6g}'
150
+ for k,v in params.items()
151
+ ) or "# no parameters"
152
+ return f'''# Optimal Parameters β€” {name}
153
+ # Engine: Julia 1.10 BacktestEngine.jl + Optimizer.jl
154
+ # Symbol: {sym} Timeframe: {tf}
155
+ # OOS Sharpe: {result.get("oos_sharpe_mean",0):.3f} Robustness: {result.get("robustness",0):.0f}/100
156
+ # Viable: {result.get("is_viable",False)}
157
+ # Generated: {datetime.now().strftime("%Y-%m-%d")}
158
+
159
+ function optimal_params()::{struct}Params
160
+ return {struct}Params(
161
+ {plines}
162
+ )
163
+ end
164
+
165
+ const BACKTEST_METADATA = (
166
+ strategy = "{name}",
167
+ symbol = "{sym}",
168
+ timeframe = "{tf}",
169
+ engine = "Julia 1.10",
170
+ oos_sharpe = {result.get("oos_sharpe_mean",0):.4f},
171
+ max_dd_pct = {result.get("oos_max_dd",0):.2f},
172
+ win_rate = {result.get("oos_win_rate",0):.1f},
173
+ wf_eff = {result.get("wf_efficiency",0):.4f},
174
+ viable = {str(result.get("is_viable",False)).lower()},
175
+ )
176
+ '''
177
+
178
+
179
+ def index_md(all_results: list) -> str:
180
+ viable = sorted([r for r in all_results if r.get("is_viable")],
181
+ key=lambda r: r.get("oos_sharpe_mean",0), reverse=True)
182
+ not_v = [r for r in all_results if not r.get("is_viable")]
183
+ now = datetime.now().strftime("%Y-%m-%d %H:%M UTC")
184
+ lines = [
185
+ "# Backtest Results Index β€” Julia Engine", "",
186
+ f"> Generated: {now} Engine: Julia 1.10 ",
187
+ f"> Total: {len(all_results)} Viable: {len(viable)}", "", "---", "",
188
+ "## βœ… Viable Strategies (by OOS Sharpe)", "",
189
+ "| # | Strategy | Symbol | TF | Sharpe | DD% | Win% | PF | Score |",
190
+ "|---|----------|--------|----|--------|-----|------|----|-------|",
191
+ ]
192
+ for i,r in enumerate(viable,1):
193
+ lines.append(
194
+ f"| {i} | {r.get('strategy','')[:35]} | {r.get('symbol','')} | {r.get('timeframe','')} | "
195
+ f"{r.get('oos_sharpe_mean',0):.2f} | {r.get('oos_max_dd',0):.1f} | "
196
+ f"{r.get('oos_win_rate',0):.1f} | {r.get('oos_pf_mean',0):.2f} | {r.get('robustness',0):.0f} |")
197
+ lines += ["", "## ❌ Not Viable", "", "| Strategy | Symbol | TF | Reason |", "|----------|--------|----|--------|"]
198
+ for r in not_v[:30]:
199
+ reasons = r.get("reasons",[])
200
+ reason = reasons[0][:60] if reasons else ""
201
+ lines.append(f"| {r.get('strategy','')[:35]} | {r.get('symbol','')} | {r.get('timeframe','')} | {reason} |")
202
+ return "\n".join(lines)
pipeline/extractor.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """pipeline/extractor.py β€” Claude API extraction + 3-layer deduplication."""
2
+ import json, time, hashlib
3
+ from typing import Optional
4
+ import numpy as np
5
+ from sklearn.feature_extraction.text import TfidfVectorizer
6
+ from sklearn.metrics.pairwise import cosine_similarity
7
+ import anthropic
8
+ from loguru import logger
9
+ import utils.config as cfg
10
+
11
+
12
+ class AIExtractor:
13
+ MODEL = "claude-sonnet-4-20250514"
14
+ def __init__(self):
15
+ self.client = anthropic.Anthropic(api_key=cfg.ANTHROPIC_API_KEY)
16
+ self.tokens_used = 0
17
+
18
+ def extract(self, chunk) -> dict:
19
+ if chunk.word_count < 20:
20
+ return {"strategies":[],"formulas":[],"systems":[]}
21
+ prompt = cfg.EXTRACTION_PROMPT.format(
22
+ source_file=chunk.source_file, page_start=chunk.page_start,
23
+ page_end=chunk.page_end, text=chunk.text)
24
+ raw = self._call(prompt)
25
+ if not raw: return {"strategies":[],"formulas":[],"systems":[]}
26
+ return self._parse(raw, chunk)
27
+
28
+ def _call(self, prompt, retries=3):
29
+ delay = 2.0
30
+ for attempt in range(retries):
31
+ try:
32
+ resp = self.client.messages.create(
33
+ model=self.MODEL, max_tokens=4096,
34
+ messages=[{"role":"user","content":prompt}])
35
+ self.tokens_used += resp.usage.input_tokens + resp.usage.output_tokens
36
+ return resp.content[0].text if resp.content else ""
37
+ except anthropic.RateLimitError:
38
+ logger.warning(f"Rate limit β€” {delay}s")
39
+ time.sleep(delay); delay *= 2
40
+ except Exception as e:
41
+ logger.error(f"API: {e}")
42
+ if attempt == retries-1: return ""
43
+ time.sleep(delay); delay *= 2
44
+ return ""
45
+
46
+ def _parse(self, raw, chunk):
47
+ raw = raw.strip()
48
+ if raw.startswith("```"): raw = "\n".join(raw.split("\n")[1:]).rstrip("`").strip()
49
+ try: data = json.loads(raw)
50
+ except:
51
+ try:
52
+ s=raw.find("{"); e=raw.rfind("}")
53
+ data=json.loads(raw[s:e+1]) if s!=-1 else {}
54
+ except: return {"strategies":[],"formulas":[],"systems":[]}
55
+ result = {}
56
+ for kind in ("strategies","formulas","systems"):
57
+ result[kind] = []
58
+ for item in data.get(kind,[]):
59
+ if isinstance(item,dict) and item.get("name"):
60
+ item.update({"source_file":chunk.source_file,
61
+ "source_pages":f"{chunk.page_start}-{chunk.page_end}"})
62
+ item["content_hash"] = _hash(
63
+ item.get("description","") + item.get("plain_text","") +
64
+ item.get("entry_system","") + item.get("name",""))
65
+ result[kind].append(item)
66
+ return result
67
+
68
+ def compile_strategy_code(self, record: dict) -> str:
69
+ """Ask Claude to generate Julia signal code for this strategy."""
70
+ compact = {k: record.get(k) for k in
71
+ ("name","category","description","entry_rules","exit_rules",
72
+ "filters","parameters","mathematical_basis")}
73
+ prompt = cfg.COMPILER_PROMPT.format(
74
+ strategy_json=json.dumps(compact, indent=2))
75
+ code = self._call(prompt)
76
+ if not code: return ""
77
+ if "```" in code:
78
+ lines = code.split("\n")
79
+ in_block = False; out = []
80
+ for line in lines:
81
+ if line.strip().startswith("```"): in_block = not in_block; continue
82
+ if in_block: out.append(line)
83
+ code = "\n".join(out)
84
+ return code.strip()
85
+
86
+
87
+ class Deduplicator:
88
+ def __init__(self, threshold=None):
89
+ self.threshold = threshold or cfg.SIMILARITY_THRESHOLD
90
+ self._vec = TfidfVectorizer(ngram_range=(1,2), max_features=5000, stop_words="english")
91
+
92
+ def process(self, extracted, kb):
93
+ stats = {k:{"added":0,"merged":0,"skipped":0} for k in ("strategies","formulas","systems")}
94
+ for kind in ("strategies","formulas","systems"):
95
+ for item in extracted.get(kind,[]):
96
+ stats[kind][self._process_one(item, kb[kind], kind)] += 1
97
+ return stats
98
+
99
+ def _process_one(self, item, store, kind):
100
+ h = item.get("content_hash","")
101
+ for e in store.values():
102
+ if e.get("content_hash") == h:
103
+ self._add_src(item, e); return "skipped"
104
+ sid = self._similar(item, store, kind)
105
+ if sid: self._merge(item, store[sid]); return "merged"
106
+ cid = _cid(item["name"], h, kind)
107
+ item["canonical_id"] = cid
108
+ item["sources"] = [item.get("source_file","")]
109
+ item["layers"] = []
110
+ store[cid] = item
111
+ return "added"
112
+
113
+ def _similar(self, item, store, kind):
114
+ if not store: return None
115
+ texts = [_text(v,kind) for v in store.values()] + [_text(item,kind)]
116
+ try:
117
+ mat = self._vec.fit_transform(texts)
118
+ sims = cosine_similarity(mat[-1], mat[:-1])[0]
119
+ idx = int(np.argmax(sims))
120
+ if sims[idx] >= self.threshold:
121
+ return list(store.keys())[idx]
122
+ except: pass
123
+ return None
124
+
125
+ @staticmethod
126
+ def _add_src(item, existing):
127
+ s = item.get("source_file","")
128
+ if s and s not in existing.get("sources",[]):
129
+ existing.setdefault("sources",[]).append(s)
130
+
131
+ @staticmethod
132
+ def _merge(item, existing):
133
+ Deduplicator._add_src(item, existing)
134
+ layers = existing.setdefault("layers",[])
135
+ if item.get("content_hash") not in {l.get("content_hash") for l in layers}:
136
+ layers.append({"source_file":item.get("source_file"),
137
+ "content_hash":item.get("content_hash"),
138
+ "data":{k:v for k,v in item.items()
139
+ if k not in ("sources","layers","canonical_id")}})
140
+
141
+
142
+ def _hash(text):
143
+ return hashlib.sha256(" ".join(text.lower().split()).encode()).hexdigest()[:16]
144
+
145
+ def _cid(name, h, kind):
146
+ return hashlib.md5(f"{kind}_{name}_{h}".encode()).hexdigest()[:12]
147
+
148
+ def _text(item, kind):
149
+ if kind=="strategies":
150
+ return f"{item.get('name','')} {item.get('description','')} {' '.join(item.get('entry_rules',[]))}"
151
+ if kind=="formulas":
152
+ return f"{item.get('name','')} {item.get('plain_text','')} {item.get('purpose','')}"
153
+ return f"{item.get('name','')} {item.get('entry_system','')} {item.get('exit_system','')}"
pipeline/julia_bridge.py ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ pipeline/julia_bridge.py
3
+ ─────────────────────────────────────────────────────
4
+ Python bridge to Julia via juliacall.
5
+
6
+ Julia is loaded ONCE on first use (lazy init) to avoid
7
+ slowing down Space startup. Subsequent calls are instant.
8
+
9
+ juliacall converts:
10
+ Python list/np.ndarray β†’ Julia Vector{Float64}
11
+ Python dict β†’ Julia Dict
12
+ Julia Dict{String,Any} β†’ Python dict
13
+ Julia Vector β†’ Python list
14
+
15
+ All heavy computation β€” indicators, backtest, optimizer β€”
16
+ runs in Julia. Python only calls this bridge.
17
+ """
18
+
19
+ from __future__ import annotations
20
+ import os
21
+ from pathlib import Path
22
+ from typing import Optional
23
+ import numpy as np
24
+ from loguru import logger
25
+
26
+ # ── Julia runtime (loaded once) ───────────────────────
27
+ _jl = None
28
+ _QE = None # QuantEngine module
29
+
30
+ JULIA_SRC = Path(__file__).parent.parent / "src"
31
+
32
+
33
+ def _init_julia():
34
+ """Lazy-load Julia + QuantEngine on first call."""
35
+ global _jl, _QE
36
+ if _jl is not None:
37
+ return
38
+
39
+ logger.info("Initializing Julia runtime…")
40
+ try:
41
+ from juliacall import Main as jl
42
+ _jl = jl
43
+
44
+ # Load the QuantEngine module
45
+ _jl.seval(f'push!(LOAD_PATH, "{JULIA_SRC}")')
46
+ _jl.seval(f'include("{JULIA_SRC / "QuantEngine.jl"}")')
47
+ _jl.seval("using .QuantEngine")
48
+ _QE = _jl.QuantEngine
49
+ logger.info("Julia QuantEngine loaded βœ“")
50
+ except Exception as e:
51
+ logger.error(f"Julia init failed: {e}")
52
+ raise RuntimeError(f"Julia init failed: {e}") from e
53
+
54
+
55
+ def _arr(x) -> "jl.Vector":
56
+ """Convert Python list/ndarray to Julia Vector{Float64}."""
57
+ _init_julia()
58
+ import numpy as np
59
+ arr = np.asarray(x, dtype=np.float64)
60
+ return _jl.convert(_jl.Vector[_jl.Float64], arr.tolist())
61
+
62
+
63
+ def _signals(x) -> "jl.Vector":
64
+ """Convert signal array to Julia Vector{Int}."""
65
+ _init_julia()
66
+ arr = [int(v) for v in x]
67
+ return _jl.convert(_jl.Vector[_jl.Int64], arr)
68
+
69
+
70
+ def _pydict(jl_dict) -> dict:
71
+ """Recursively convert Julia Dict to Python dict."""
72
+ out = {}
73
+ for k in jl_dict.keys():
74
+ v = jl_dict[k]
75
+ k_py = str(k)
76
+ if hasattr(v, "keys"):
77
+ out[k_py] = _pydict(v)
78
+ elif hasattr(v, "__iter__") and not isinstance(v, str):
79
+ out[k_py] = list(v)
80
+ elif isinstance(v, bool):
81
+ out[k_py] = bool(v)
82
+ elif hasattr(v, "__float__"):
83
+ out[k_py] = float(v)
84
+ elif hasattr(v, "__int__"):
85
+ out[k_py] = int(v)
86
+ else:
87
+ out[k_py] = v
88
+ return out
89
+
90
+
91
+ # ── Public API ────────────────────────────────────────
92
+
93
+ def full_backtest_pipeline(
94
+ strategy_code: str,
95
+ strategy_name: str,
96
+ open_p: list | np.ndarray,
97
+ high: list | np.ndarray,
98
+ low: list | np.ndarray,
99
+ close: list | np.ndarray,
100
+ volume: list | np.ndarray,
101
+ timeframe: str,
102
+ symbol: str,
103
+ n_windows: int = 5,
104
+ is_ratio: float = 0.70,
105
+ min_trades: int = 30,
106
+ min_sharpe: float = 0.5,
107
+ max_combos: int = 300,
108
+ initial_equity: float = 10_000.0,
109
+ commission_pct: float = 0.0002,
110
+ risk_per_trade: float = 0.01,
111
+ ) -> dict:
112
+ """
113
+ Run full Julia backtest pipeline.
114
+ Compile strategy code β†’ walk-forward optimize β†’ return results dict.
115
+ """
116
+ _init_julia()
117
+ try:
118
+ result = _QE.full_backtest_pipeline(
119
+ strategy_code, strategy_name,
120
+ _arr(open_p), _arr(high), _arr(low),
121
+ _arr(close), _arr(volume),
122
+ timeframe, symbol,
123
+ n_windows = n_windows,
124
+ is_ratio = is_ratio,
125
+ min_trades = min_trades,
126
+ min_sharpe = min_sharpe,
127
+ max_combos = max_combos,
128
+ initial_equity = initial_equity,
129
+ commission_pct = commission_pct,
130
+ risk_per_trade = risk_per_trade,
131
+ )
132
+ return _pydict(result)
133
+ except Exception as e:
134
+ logger.error(f"Julia pipeline error [{strategy_name}]: {e}")
135
+ return {
136
+ "is_valid": False,
137
+ "error": str(e),
138
+ "strategy": strategy_name,
139
+ "symbol": symbol,
140
+ "timeframe": timeframe,
141
+ }
142
+
143
+
144
+ def run_backtest_only(
145
+ open_p: np.ndarray, high: np.ndarray, low: np.ndarray,
146
+ close: np.ndarray, volume: np.ndarray,
147
+ signals: list | np.ndarray,
148
+ timeframe: str = "1h",
149
+ initial_equity: float = 10_000.0,
150
+ commission_pct: float = 0.0002,
151
+ risk_per_trade: float = 0.01,
152
+ ) -> dict:
153
+ """Run a single backtest with pre-computed signals."""
154
+ _init_julia()
155
+ try:
156
+ cfg = _QE.BacktestConfig(
157
+ initial_equity=initial_equity,
158
+ commission_pct=commission_pct,
159
+ risk_per_trade=risk_per_trade,
160
+ )
161
+ r = _QE.run_backtest(
162
+ _arr(open_p), _arr(high), _arr(low),
163
+ _arr(close), _arr(volume),
164
+ _signals(signals), timeframe, cfg,
165
+ )
166
+ return {
167
+ "total_return": float(r.total_return),
168
+ "cagr": float(r.cagr),
169
+ "sharpe": float(r.sharpe),
170
+ "sortino": float(r.sortino),
171
+ "calmar": float(r.calmar),
172
+ "max_dd": float(r.max_dd),
173
+ "n_trades": int(r.n_trades),
174
+ "win_rate": float(r.win_rate),
175
+ "profit_factor": float(r.profit_factor),
176
+ "final_equity": float(r.final_equity),
177
+ "equity_curve": list(r.equity_curve),
178
+ "is_valid": bool(r.is_valid),
179
+ "error": str(r.error_msg),
180
+ }
181
+ except Exception as e:
182
+ return {"is_valid": False, "error": str(e)}
183
+
184
+
185
+ def indicator(name: str, *args, **kwargs):
186
+ """
187
+ Call any indicator by name from Python.
188
+ Returns numpy array.
189
+
190
+ Example:
191
+ rsi_values = indicator("rsi", close_array, 14)
192
+ upper, mid, lower = indicator("bbands", close_array, 20, 2.0)
193
+ """
194
+ _init_julia()
195
+ fn = getattr(_QE, name, None)
196
+ if fn is None:
197
+ raise ValueError(f"Unknown indicator: {name}")
198
+ jl_args = [_arr(a) if isinstance(a, (list, np.ndarray)) else a
199
+ for a in args]
200
+ result = fn(*jl_args, **kwargs)
201
+ if isinstance(result, tuple):
202
+ return tuple(np.array(list(r)) for r in result)
203
+ return np.array(list(result))
204
+
205
+
206
+ def julia_available() -> bool:
207
+ """Check if Julia runtime is available."""
208
+ try:
209
+ _init_julia()
210
+ return _QE is not None
211
+ except Exception:
212
+ return False
pipeline/pdf_processor.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """pipeline/pdf_processor.py β€” PDF text extraction + OCR fallback."""
2
+ import re
3
+ from pathlib import Path
4
+ from typing import Generator
5
+ from dataclasses import dataclass
6
+ import fitz, pdfplumber, pytesseract
7
+ from pdf2image import convert_from_path
8
+ from loguru import logger
9
+ import utils.config as cfg
10
+
11
+
12
+ @dataclass
13
+ class Chunk:
14
+ source_file: str
15
+ page_start: int
16
+ page_end: int
17
+ text: str
18
+ was_ocr: bool = False
19
+
20
+ @property
21
+ def word_count(self): return len(self.text.split())
22
+
23
+
24
+ class PDFProcessor:
25
+ MIN_CHARS = 80
26
+ CHUNK_WORDS = cfg.MAX_TOKENS_PER_CHUNK // 2
27
+
28
+ def process(self, pdf_path: Path) -> Generator[Chunk, None, None]:
29
+ pages = self._extract_pages(pdf_path)
30
+ yield from self._chunk(pages, pdf_path.name)
31
+
32
+ def _extract_pages(self, path):
33
+ mu = {}
34
+ try:
35
+ doc = fitz.open(str(path))
36
+ for i, pg in enumerate(doc): mu[i+1] = self._clean(pg.get_text("text"))
37
+ doc.close()
38
+ except: pass
39
+ pl = {}
40
+ try:
41
+ with pdfplumber.open(str(path)) as pdf:
42
+ for i, pg in enumerate(pdf.pages):
43
+ try: pl[i+1] = self._clean(pg.extract_text() or "")
44
+ except: pl[i+1] = ""
45
+ except: pass
46
+ total = max(len(mu), len(pl), 1)
47
+ results = []; ocr_needed = []
48
+ for pnum in range(1, total+1):
49
+ best = mu.get(pnum,"") if len(mu.get(pnum,"")) > len(pl.get(pnum,"")) else pl.get(pnum,"")
50
+ if len(best) >= self.MIN_CHARS: results.append((pnum, best, False))
51
+ else: results.append((pnum, best, False)); ocr_needed.append(pnum)
52
+ if ocr_needed:
53
+ ocr = self._ocr(path, ocr_needed)
54
+ for i,(pnum,_,_) in enumerate(results):
55
+ if pnum in ocr: results[i] = (pnum, ocr[pnum], True)
56
+ return results
57
+
58
+ def _ocr(self, path, pages):
59
+ out = {}
60
+ try:
61
+ imgs = convert_from_path(str(path), dpi=cfg.OCR_DPI,
62
+ first_page=min(pages), last_page=max(pages))
63
+ for i, pnum in enumerate(range(min(pages), max(pages)+1)):
64
+ if pnum in pages and i < len(imgs):
65
+ out[pnum] = self._clean(pytesseract.image_to_string(imgs[i],lang="eng",config="--psm 6"))
66
+ except Exception as e: logger.warning(f"OCR: {e}")
67
+ return out
68
+
69
+ def _chunk(self, pages, source):
70
+ buf, words, p_start, any_ocr = [], 0, 1, False
71
+ for pnum, text, ocr in pages:
72
+ if not text: continue
73
+ buf.append(text); words += len(text.split())
74
+ if ocr: any_ocr = True
75
+ if words >= self.CHUNK_WORDS:
76
+ yield Chunk(source, p_start, pnum, "\n\n".join(buf), any_ocr)
77
+ buf, words, p_start, any_ocr = [text], len(text.split()), pnum, ocr
78
+ if buf:
79
+ last = pages[-1][0] if pages else p_start
80
+ yield Chunk(source, p_start, last, "\n\n".join(buf), any_ocr)
81
+
82
+ @staticmethod
83
+ def _clean(text):
84
+ if not text: return ""
85
+ text = re.sub(r"(\w)-\n(\w)", r"\1\2", text)
86
+ text = re.sub(r"\n{3,}", "\n\n", text)
87
+ text = re.sub(r"[ \t]+", " ", text)
88
+ return text.strip()
requirements.txt ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Julia↔Python bridge
2
+ juliacall>=0.9.23
3
+
4
+ # Gradio UI
5
+ gradio>=5.25.0
6
+
7
+ # Claude API
8
+ anthropic>=0.30.0
9
+
10
+ # HuggingFace β€” no version pin, Spaces base image controls this
11
+ huggingface_hub
12
+
13
+ # PDF processing
14
+ pypdf>=4.2.0
15
+ pdfplumber>=0.11.0
16
+ pymupdf>=1.24.0
17
+ pdf2image>=1.17.0
18
+ pytesseract>=0.3.10
19
+ Pillow>=10.3.0
20
+
21
+ # Data
22
+ numpy>=1.26.0
23
+ pandas>=2.2.0
24
+ pyarrow>=16.0.0
25
+
26
+ # Deduplication (used in extractor.py)
27
+ scikit-learn>=1.5.0
28
+
29
+ # Utilities
30
+ loguru>=0.7.2
31
+ pydantic>=2.7.0
32
+ tqdm>=4.66.0
src/BacktestEngine.jl ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ BacktestEngine.jl β€” Vectorized backtest engine.
3
+ No includes. Receives Indicators module via QuantEngine parent scope.
4
+ """
5
+ module BacktestEngine
6
+
7
+ using Statistics
8
+
9
+ export run_backtest, BacktestResult, BacktestConfig
10
+
11
+ # Indicators injected by QuantEngine before this module is used
12
+ # atr() is accessed via the parent module's scope at call time
13
+
14
+ const BARS_PER_YEAR = Dict(
15
+ "1m"=>525_600,"3m"=>175_200,"5m"=>105_120,"15m"=>35_040,"30m"=>17_520,
16
+ "1h"=>8_760,"2h"=>4_380,"4h"=>2_190,"6h"=>1_460,"12h"=>730,
17
+ "1d"=>252,"1w"=>52,
18
+ )
19
+
20
+ Base.@kwdef struct BacktestConfig
21
+ initial_equity :: Float64 = 10_000.0
22
+ commission_pct :: Float64 = 0.0002
23
+ slippage_pct :: Float64 = 0.0001
24
+ risk_per_trade :: Float64 = 0.01
25
+ atr_mult :: Float64 = 2.0
26
+ max_pos_pct :: Float64 = 0.20
27
+ atr_period :: Int = 14
28
+ end
29
+
30
+ mutable struct BacktestResult
31
+ total_return :: Float64; cagr :: Float64
32
+ sharpe :: Float64; sortino :: Float64; calmar :: Float64
33
+ max_dd :: Float64; max_dd_bars :: Int
34
+ n_trades :: Int; n_wins :: Int; win_rate :: Float64
35
+ profit_factor :: Float64; avg_win_pct :: Float64; avg_loss_pct :: Float64
36
+ expectancy :: Float64; avg_bars_held :: Float64
37
+ max_consec_wins:: Int; max_consec_loss:: Int
38
+ final_equity :: Float64; total_comm :: Float64
39
+ equity_curve :: Vector{Float64}
40
+ n_bars :: Int; is_valid :: Bool; error_msg :: String
41
+ end
42
+
43
+ BacktestResult(; n_bars=0, is_valid=false, error_msg="") = BacktestResult(
44
+ 0.0,0.0,0.0,0.0,0.0,0.0,0, 0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,
45
+ 10_000.0,0.0, Float64[], n_bars,is_valid,error_msg)
46
+
47
+ function run_backtest(
48
+ open_p::Vector{Float64}, high::Vector{Float64}, low::Vector{Float64},
49
+ close::Vector{Float64}, volume::Vector{Float64}, signals::Vector{Int},
50
+ timeframe::String="1h", cfg::BacktestConfig=BacktestConfig(),
51
+ atr_fn::Function=identity, # passed from QuantEngine to avoid circular dep
52
+ )::BacktestResult
53
+ n = length(close)
54
+ n < 50 && return BacktestResult(; n_bars=n, error_msg="Need β‰₯50 bars, got $n")
55
+
56
+ atr_v = atr_fn(high, low, close, cfg.atr_period)
57
+ equity = cfg.initial_equity
58
+ eq = fill(cfg.initial_equity, n)
59
+
60
+ tpnls = Vector{Float64}(undef, nΓ·2+1)
61
+ twins = Vector{Bool}(undef, nΓ·2+1)
62
+ tbars = Vector{Int}(undef, nΓ·2+1)
63
+ tents = Vector{Float64}(undef, nΓ·2+1)
64
+ tszs = Vector{Float64}(undef, nΓ·2+1)
65
+ nt = 0; tcomm = 0.0
66
+
67
+ pos=0; epx=0.0; psz=0.0; spx=0.0; ebar=1; ltrade=0
68
+
69
+ @inbounds for i in 2:n
70
+ px=close[i]; sig=signals[i]
71
+ if pos != 0
72
+ hit = (pos==1 && low[i]<=spx) || (pos==-1 && high[i]>=spx)
73
+ if hit
74
+ ep = spx*(1.0+cfg.slippage_pct*pos)
75
+ pnl = pos*(ep-epx)*psz; comm=(epx+ep)*psz*cfg.commission_pct
76
+ nt+=1; tpnls[nt]=pnl-comm; twins[nt]=pnl>comm
77
+ tbars[nt]=i-ebar; tents[nt]=epx; tszs[nt]=psz
78
+ tcomm+=comm; equity+=pnl-comm; pos=0; ltrade=i
79
+ end
80
+ end
81
+ if pos!=0 && (sig==0 || sig==-pos)
82
+ ep=px*(1.0+cfg.slippage_pct*pos)
83
+ pnl=pos*(ep-epx)*psz; comm=(epx+ep)*psz*cfg.commission_pct
84
+ nt+=1; tpnls[nt]=pnl-comm; twins[nt]=pnl>comm
85
+ tbars[nt]=i-ebar; tents[nt]=epx; tszs[nt]=psz
86
+ tcomm+=comm; equity+=pnl-comm; pos=0; ltrade=i
87
+ end
88
+ if pos==0 && sig!=0 && (i-ltrade)>=1
89
+ ep=px*(1.0+cfg.slippage_pct*sig)
90
+ av = isnan(atr_v[i]) ? px*0.01 : atr_v[i]
91
+ dist=cfg.atr_mult*av
92
+ sz=min(equity*cfg.risk_per_trade/max(dist,1e-8), equity*cfg.max_pos_pct/ep)
93
+ sz=max(sz,1e-8)
94
+ pos=sig; epx=ep; psz=sz; spx=ep-sig*dist; ebar=i
95
+ end
96
+ eq[i] = equity + (pos!=0 ? pos*(close[i]-epx)*psz : 0.0)
97
+ end
98
+ if pos!=0
99
+ ep=close[n]; pnl=pos*(ep-epx)*psz; comm=(epx+ep)*psz*cfg.commission_pct
100
+ nt+=1; tpnls[nt]=pnl-comm; twins[nt]=pnl>comm
101
+ tbars[nt]=n-ebar; tents[nt]=epx; tszs[nt]=psz
102
+ tcomm+=comm; equity+=pnl-comm; eq[n]=equity
103
+ end
104
+
105
+ return _metrics(eq, tpnls[1:nt], twins[1:nt], tbars[1:nt],
106
+ tents[1:nt], tszs[1:nt], tcomm, n, timeframe, cfg)
107
+ end
108
+
109
+ function _metrics(eq,pnls,wins,bars,ents,szs,tcomm,n_bars,tf,cfg)
110
+ init=cfg.initial_equity; final=eq[end]; bpy=get(BARS_PER_YEAR,tf,252)
111
+ r=BacktestResult(;n_bars,is_valid=true)
112
+ r.equity_curve=eq; r.final_equity=final; r.total_comm=tcomm
113
+ r.total_return=(final-init)/init*100.0
114
+ yrs=n_bars/bpy
115
+ r.cagr = yrs>0&&final>0 ? ((final/init)^(1.0/yrs)-1.0)*100.0 : 0.0
116
+ peak=eq[1]; mxdd=0.0; ddr=0; mxddb=0
117
+ for v in eq
118
+ peak=max(peak,v); dd=(peak-v)/peak; mxdd=max(mxdd,dd)
119
+ v<peak ? (ddr+=1; mxddb=max(mxddb,ddr)) : (ddr=0)
120
+ end
121
+ r.max_dd=mxdd*100.0; r.max_dd_bars=mxddb
122
+ rets=diff(eq)./eq[1:end-1]; filter!(!isnan,rets)
123
+ if length(rets)>1
124
+ mu=mean(rets); sg=std(rets)
125
+ ds_v=filter(x->x<0,rets); ds=length(ds_v)>1 ? std(ds_v) : sg
126
+ af=sqrt(Float64(bpy))
127
+ r.sharpe=sg>0 ? mu/sg*af : 0.0; r.sortino=ds>0 ? mu/ds*af : 0.0
128
+ r.calmar=r.max_dd>0 ? r.cagr/r.max_dd : 0.0
129
+ end
130
+ r.n_trades=length(pnls)
131
+ r.n_trades==0 && return r
132
+ nw=count(wins); r.n_wins=nw; r.win_rate=nw/r.n_trades*100.0
133
+ gw=sum(pnls[wins]); gl=abs(sum(pnls[.!wins]))
134
+ r.profit_factor=gl>0 ? gw/gl : (gw>0 ? Inf : 0.0)
135
+ pct=pnls./(ents.*szs.+1e-10).*100.0
136
+ r.avg_win_pct = nw>0 ? mean(pct[wins]) : 0.0
137
+ r.avg_loss_pct = (r.n_trades-nw)>0 ? mean(pct[.!wins]) : 0.0
138
+ r.expectancy=r.win_rate/100.0*r.avg_win_pct+(1-r.win_rate/100.0)*r.avg_loss_pct
139
+ r.avg_bars_held=mean(Float64.(bars))
140
+ r.max_consec_wins=_maxrun(wins); r.max_consec_loss=_maxrun(.!wins)
141
+ return r
142
+ end
143
+
144
+ function _maxrun(b::Vector{Bool})::Int
145
+ mx=run=0; for v in b; v ? (run+=1;mx=max(mx,run)) : (run=0); end; return mx
146
+ end
147
+
148
+ end # module BacktestEngine
src/Indicators.jl ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Indicators.jl β€” Vectorized technical indicator library.
3
+ Standalone module. No includes. No external deps beyond Statistics.
4
+ """
5
+ module Indicators
6
+
7
+ using Statistics
8
+
9
+ export sma, ema, wma, tema, dema,
10
+ rsi, macd, stoch, cci, williams_r,
11
+ atr, bbands, keltner, donchian, adx,
12
+ vwap, obv, cmf, zscore, std_dev,
13
+ momentum, roc, highest, lowest,
14
+ crossover, crossunder
15
+
16
+ # ── Trend ─────────────────────────────────────────────
17
+
18
+ function sma(x::Vector{Float64}, n::Int)::Vector{Float64}
19
+ len = length(x); out = fill(NaN, len); s = 0.0
20
+ for i in 1:len
21
+ s += x[i]
22
+ if i >= n
23
+ i > n && (s -= x[i-n])
24
+ out[i] = s / n
25
+ end
26
+ end
27
+ return out
28
+ end
29
+
30
+ function ema(x::Vector{Float64}, n::Int)::Vector{Float64}
31
+ len = length(x); out = fill(NaN, len)
32
+ k = 2.0 / (n + 1.0)
33
+ # seed: SMA of first n non-NaN values
34
+ s = 0.0; cnt = 0; seed_i = 0
35
+ for i in 1:len
36
+ isnan(x[i]) && continue
37
+ s += x[i]; cnt += 1
38
+ if cnt == n
39
+ seed_i = i; out[i] = s / n
40
+ val = out[i]
41
+ for j in (i+1):len
42
+ isnan(x[j]) && continue
43
+ val = x[j] * k + val * (1.0 - k)
44
+ out[j] = val
45
+ end
46
+ break
47
+ end
48
+ end
49
+ return out
50
+ end
51
+
52
+ function wma(x::Vector{Float64}, n::Int)::Vector{Float64}
53
+ len = length(x); out = fill(NaN, len)
54
+ ws = n * (n+1) / 2.0
55
+ for i in n:len
56
+ s = 0.0
57
+ for j in 1:n; s += x[i-n+j] * j; end
58
+ out[i] = s / ws
59
+ end
60
+ return out
61
+ end
62
+
63
+ tema(x::Vector{Float64}, n::Int) = let e1=ema(x,n),e2=ema(e1,n),e3=ema(e2,n); 3.0.*e1 .- 3.0.*e2 .+ e3 end
64
+ dema(x::Vector{Float64}, n::Int) = let e1=ema(x,n),e2=ema(e1,n); 2.0.*e1 .- e2 end
65
+
66
+ # ── Oscillators ───────────────────────────────────────
67
+
68
+ function rsi(close::Vector{Float64}, n::Int=14)::Vector{Float64}
69
+ len = length(close); out = fill(NaN, len)
70
+ ag = 0.0; al = 0.0
71
+ for i in 2:(n+1)
72
+ i > len && break
73
+ d = close[i] - close[i-1]
74
+ d > 0 ? (ag += d) : (al += abs(d))
75
+ end
76
+ ag /= n; al /= n
77
+ n+1 <= len && (out[n+1] = 100.0 - 100.0/(1.0 + (al==0 ? 1e10 : ag/al)))
78
+ for i in (n+2):len
79
+ d = close[i] - close[i-1]
80
+ g = d > 0 ? d : 0.0; l = d < 0 ? abs(d) : 0.0
81
+ ag = (ag*(n-1)+g)/n; al = (al*(n-1)+l)/n
82
+ out[i] = 100.0 - 100.0/(1.0 + (al==0 ? 1e10 : ag/al))
83
+ end
84
+ return out
85
+ end
86
+
87
+ function macd(close::Vector{Float64}; fast::Int=12, slow::Int=26, sig::Int=9)
88
+ ml = ema(close,fast) .- ema(close,slow)
89
+ sl = ema(ml, sig)
90
+ return ml, sl, ml .- sl
91
+ end
92
+
93
+ function stoch(high::Vector{Float64}, low::Vector{Float64}, close::Vector{Float64};
94
+ k::Int=14, d::Int=3)
95
+ len = length(close); K = fill(NaN, len)
96
+ for i in k:len
97
+ hh = maximum(high[i-k+1:i]); ll = minimum(low[i-k+1:i])
98
+ K[i] = hh==ll ? 50.0 : 100.0*(close[i]-ll)/(hh-ll)
99
+ end
100
+ return K, sma(K, d)
101
+ end
102
+
103
+ function cci(high::Vector{Float64}, low::Vector{Float64}, close::Vector{Float64}, n::Int=20)::Vector{Float64}
104
+ len = length(close); tp = (high.+low.+close)./3.0; out = fill(NaN, len)
105
+ for i in n:len
106
+ w = tp[i-n+1:i]; m = mean(w); md = mean(abs.(w.-m))
107
+ out[i] = md==0 ? 0.0 : (tp[i]-m)/(0.015*md)
108
+ end
109
+ return out
110
+ end
111
+
112
+ function williams_r(high::Vector{Float64}, low::Vector{Float64}, close::Vector{Float64}, n::Int=14)::Vector{Float64}
113
+ len = length(close); out = fill(NaN, len)
114
+ for i in n:len
115
+ hh = maximum(high[i-n+1:i]); ll = minimum(low[i-n+1:i])
116
+ out[i] = hh==ll ? -50.0 : -100.0*(hh-close[i])/(hh-ll)
117
+ end
118
+ return out
119
+ end
120
+
121
+ momentum(x::Vector{Float64}, n::Int=10) = let len=length(x),out=fill(NaN,len); for i in (n+1):len; out[i]=x[i]-x[i-n]; end; out end
122
+ roc(x::Vector{Float64}, n::Int=10) = let len=length(x),out=fill(NaN,len); for i in (n+1):len; out[i]=x[i-n]==0 ? 0.0 : 100.0*(x[i]-x[i-n])/x[i-n]; end; out end
123
+
124
+ # ── Volatility ────────────────────────────────────────
125
+
126
+ function _tr(high::Vector{Float64}, low::Vector{Float64}, close::Vector{Float64})::Vector{Float64}
127
+ len = length(close); tr = fill(NaN, len)
128
+ tr[1] = high[1]-low[1]
129
+ for i in 2:len; tr[i] = max(high[i]-low[i], abs(high[i]-close[i-1]), abs(low[i]-close[i-1])); end
130
+ return tr
131
+ end
132
+
133
+ atr(high::Vector{Float64}, low::Vector{Float64}, close::Vector{Float64}, n::Int=14) = ema(_tr(high,low,close), n)
134
+
135
+ function bbands(close::Vector{Float64}, n::Int=20, k::Float64=2.0)
136
+ len = length(close); mid = sma(close,n); sd = fill(NaN, len)
137
+ for i in n:len; sd[i] = std(close[i-n+1:i]; corrected=false); end
138
+ return mid.+k.*sd, mid, mid.-k.*sd
139
+ end
140
+
141
+ function keltner(high::Vector{Float64}, low::Vector{Float64}, close::Vector{Float64}, n::Int=20, k::Float64=2.0)
142
+ mid = ema(close,n); a = atr(high,low,close,n)
143
+ return mid.+k.*a, mid, mid.-k.*a
144
+ end
145
+
146
+ function donchian(high::Vector{Float64}, low::Vector{Float64}, n::Int=20)
147
+ len = length(high); u = fill(NaN,len); l = fill(NaN,len)
148
+ for i in n:len; u[i]=maximum(high[i-n+1:i]); l[i]=minimum(low[i-n+1:i]); end
149
+ return u, (u.+l)./2.0, l
150
+ end
151
+
152
+ function std_dev(x::Vector{Float64}, n::Int=20)::Vector{Float64}
153
+ len = length(x); out = fill(NaN, len)
154
+ for i in n:len; out[i] = std(x[i-n+1:i]; corrected=false); end
155
+ return out
156
+ end
157
+
158
+ function zscore(x::Vector{Float64}, n::Int=20)::Vector{Float64}
159
+ mu = sma(x,n); sd = std_dev(x,n); out = fill(NaN, length(x))
160
+ for i in eachindex(x)
161
+ !isnan(mu[i]) && !isnan(sd[i]) && sd[i]>0 && (out[i]=(x[i]-mu[i])/sd[i])
162
+ end
163
+ return out
164
+ end
165
+
166
+ # ── Trend strength ────────────────────────────────────
167
+
168
+ function adx(high::Vector{Float64}, low::Vector{Float64}, close::Vector{Float64}, n::Int=14)
169
+ tr = _tr(high,low,close)
170
+ up = diff(vcat(high[1],high)); dn = diff(vcat(low[1],low))
171
+ pdm = map((u,d)->u>d&&u>0 ? u : 0.0, up, dn)
172
+ ndm = map((u,d)->d>u&&d>0 ? d : 0.0, up, dn)
173
+ sm=ema(tr,n); pdi=100.0.*ema(pdm,n)./(sm.+1e-10); ndi=100.0.*ema(ndm,n)./(sm.+1e-10)
174
+ dx = 100.0.*abs.(pdi.-ndi)./(pdi.+ndi.+1e-10)
175
+ return ema(dx,n), pdi, ndi
176
+ end
177
+
178
+ # ── Volume ────────────────────────────────────────────
179
+
180
+ function vwap(high::Vector{Float64}, low::Vector{Float64}, close::Vector{Float64}, volume::Vector{Float64})::Vector{Float64}
181
+ tp = (high.+low.+close)./3.0
182
+ return cumsum(tp.*volume)./(cumsum(volume).+1e-10)
183
+ end
184
+
185
+ function obv(close::Vector{Float64}, volume::Vector{Float64})::Vector{Float64}
186
+ len = length(close); out = zeros(Float64, len); out[1] = volume[1]
187
+ for i in 2:len
188
+ d = close[i]-close[i-1]
189
+ out[i] = out[i-1] + (d>0 ? volume[i] : d<0 ? -volume[i] : 0.0)
190
+ end
191
+ return out
192
+ end
193
+
194
+ function cmf(high::Vector{Float64}, low::Vector{Float64}, close::Vector{Float64},
195
+ volume::Vector{Float64}, n::Int=20)::Vector{Float64}
196
+ len = length(close); out = fill(NaN, len)
197
+ hl = high.-low
198
+ mfv = map((c,l,h,hl)->hl==0 ? 0.0 : (2c-l-h)/hl, close,low,high,hl).*volume
199
+ for i in n:len
200
+ sv = sum(volume[i-n+1:i])
201
+ out[i] = sv==0 ? 0.0 : sum(mfv[i-n+1:i])/sv
202
+ end
203
+ return out
204
+ end
205
+
206
+ # ── Utilities ─────────────────────────────────────────
207
+
208
+ highest(x::Vector{Float64}, n::Int) = let len=length(x),out=fill(NaN,len); for i in n:len; out[i]=maximum(x[i-n+1:i]); end; out end
209
+ lowest(x::Vector{Float64}, n::Int) = let len=length(x),out=fill(NaN,len); for i in n:len; out[i]=minimum(x[i-n+1:i]); end; out end
210
+
211
+ function crossover(a::Vector{Float64}, b::Vector{Float64})::Vector{Bool}
212
+ len=length(a); out=fill(false,len)
213
+ for i in 2:len; out[i] = a[i]>b[i] && a[i-1]<=b[i-1]; end
214
+ return out
215
+ end
216
+
217
+ function crossunder(a::Vector{Float64}, b::Vector{Float64})::Vector{Bool}
218
+ len=length(a); out=fill(false,len)
219
+ for i in 2:len; out[i] = a[i]<b[i] && a[i-1]>=b[i-1]; end
220
+ return out
221
+ end
222
+
223
+ end # module Indicators
src/Manifest.toml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is machine-generated - editing it directly is not recommended
2
+ julia_version = "1.10.7"
3
+ manifest_format = "2.0"
4
+ project_hash = "abc123"
5
+
6
+ [[deps.JSON3]]
7
+ deps = ["Dates", "Mmap", "Parsers", "PrecompileTools", "StructTypes", "UUIDs"]
8
+ git-tree-sha1 = "eb3edce0ed4fa32f75a0a11217433c31d56bd48b"
9
+ uuid = "0f8b85d8-7e73-4b43-9b43-f8e4f07d6bcd"
10
+ version = "1.14.0"
11
+
12
+ [[deps.Random]]
13
+ deps = ["SHA"]
14
+ uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
15
+
16
+ [[deps.Statistics]]
17
+ deps = ["LinearAlgebra", "SparseArrays"]
18
+ uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
19
+ version = "1.10.0"
src/Optimizer.jl ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Optimizer.jl β€” Walk-forward optimization engine.
3
+ No includes. BacktestConfig/run_backtest/BacktestResult received via QuantEngine.
4
+ """
5
+ module Optimizer
6
+
7
+ using Statistics, Random
8
+
9
+ export walk_forward_optimize, OptimResult
10
+
11
+ mutable struct OptimResult
12
+ strategy_name::String; symbol::String; timeframe::String
13
+ optimal_params::Dict{String,Float64}
14
+ oos_sharpe_mean::Float64; oos_sharpe_std::Float64
15
+ oos_win_rate::Float64; oos_max_dd::Float64; oos_pf_mean::Float64
16
+ oos_trades::Int; wf_efficiency::Float64; robustness::Float64
17
+ is_viable::Bool; reasons::Vector{String}; oos_sharpes::Vector{Float64}
18
+ end
19
+
20
+ OptimResult(n,s,t) = OptimResult(n,s,t,Dict{String,Float64}(),
21
+ 0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,false,String[],Float64[])
22
+
23
+ function walk_forward_optimize(
24
+ signal_fn::Function,
25
+ param_grid::Dict{String,Vector{Float64}},
26
+ open_p::Vector{Float64}, high::Vector{Float64},
27
+ low::Vector{Float64}, close::Vector{Float64},
28
+ volume::Vector{Float64}, timeframe::String,
29
+ strategy_name::String, symbol::String;
30
+ run_bt_fn::Function, # run_backtest injected from QuantEngine
31
+ bt_cfg_fn::Function, # BacktestConfig() constructor injected
32
+ n_windows::Int=5, is_ratio::Float64=0.70,
33
+ min_trades::Int=30, min_sharpe::Float64=0.5,
34
+ max_combos::Int=300,
35
+ )::OptimResult
36
+ result = OptimResult(strategy_name, symbol, timeframe)
37
+ n = length(close)
38
+ n < 200 && (push!(result.reasons,"Need β‰₯200 bars, got $n"); return result)
39
+ isempty(param_grid) && (param_grid = Dict{String,Vector{Float64}}())
40
+
41
+ cfg = bt_cfg_fn()
42
+ combos = _build_combos(param_grid, max_combos)
43
+ windows = _windows(n, n_windows)
44
+ isempty(windows) && (push!(result.reasons,"No WF windows"); return result)
45
+
46
+ win_params=Vector{Dict{String,Float64}}()
47
+ is_sharpes=Float64[]; oos_sharpes=Float64[]
48
+ oos_results=[]
49
+
50
+ for (is_s,is_e,oos_s,oos_e) in windows
51
+ best_p=nothing; best_sh=-Inf
52
+ for p in combos
53
+ r = _run(signal_fn,run_bt_fn,cfg,
54
+ open_p[is_s:is_e],high[is_s:is_e],
55
+ low[is_s:is_e],close[is_s:is_e],
56
+ volume[is_s:is_e],p,timeframe)
57
+ r.is_valid && r.n_trades>=min_trades && r.sharpe>best_sh && (best_sh=r.sharpe; best_p=p)
58
+ end
59
+ best_p===nothing && continue
60
+ push!(win_params,best_p); push!(is_sharpes,best_sh)
61
+ oos_r = _run(signal_fn,run_bt_fn,cfg,
62
+ open_p[oos_s:oos_e],high[oos_s:oos_e],
63
+ low[oos_s:oos_e],close[oos_s:oos_e],
64
+ volume[oos_s:oos_e],best_p,timeframe)
65
+ push!(oos_results,oos_r); push!(oos_sharpes,oos_r.sharpe)
66
+ end
67
+
68
+ isempty(oos_results) && (push!(result.reasons,"No valid WF windows"); return result)
69
+ result.oos_sharpes = oos_sharpes
70
+
71
+ valid = filter(r->r.is_valid && r.n_trades>=min_trades, oos_results)
72
+ if !isempty(valid)
73
+ sh=[r.sharpe for r in valid]
74
+ result.oos_sharpe_mean=mean(sh); result.oos_sharpe_std=std(sh)
75
+ result.oos_win_rate=mean([r.win_rate for r in valid])
76
+ result.oos_max_dd=mean([r.max_dd for r in valid])
77
+ pfs=filter(x->x<100,[r.profit_factor for r in valid])
78
+ result.oos_pf_mean=isempty(pfs) ? 0.0 : mean(pfs)
79
+ result.oos_trades=sum(r.n_trades for r in valid)
80
+ end
81
+ if !isempty(is_sharpes) && !isempty(oos_sharpes)
82
+ mis=mean(is_sharpes); mos=mean(oos_sharpes)
83
+ result.wf_efficiency = mis>0 ? mos/mis : 0.0
84
+ end
85
+ result.optimal_params = _vote(win_params, oos_sharpes)
86
+ result.robustness = _robustness(result, min_trades)
87
+ result.is_viable, result.reasons = _viability(result, min_trades, min_sharpe)
88
+ return result
89
+ end
90
+
91
+ function _run(sig_fn,run_bt,cfg,o,h,l,c,v,params,tf)
92
+ try
93
+ sigs = sig_fn(o,h,l,c,v,params)
94
+ return run_bt(o,h,l,c,v,sigs,tf,cfg)
95
+ catch e
96
+ # Return an invalid result
97
+ r = run_bt(o,h,l,c,v,zeros(Int,length(c)),tf,cfg)
98
+ r.is_valid = false; r.error_msg = string(e)
99
+ return r
100
+ end
101
+ end
102
+
103
+ function _build_combos(grid::Dict{String,Vector{Float64}}, max_c::Int)::Vector{Dict{String,Float64}}
104
+ isempty(grid) && return [Dict{String,Float64}()]
105
+ ks=collect(keys(grid)); vs=[grid[k] for k in ks]
106
+ all_c=Dict{String,Float64}[]
107
+ function recurse(i,current)
108
+ if i>length(ks); push!(all_c,copy(current)); return; end
109
+ for v in vs[i]; current[ks[i]]=v; recurse(i+1,current); end
110
+ end
111
+ recurse(1,Dict{String,Float64}())
112
+ length(all_c)>max_c && (all_c=all_c[randperm(length(all_c))[1:max_c]])
113
+ return all_c
114
+ end
115
+
116
+ function _windows(n::Int,nw::Int)::Vector{Tuple{Int,Int,Int,Int}}
117
+ osz=max(50,nΓ·(nw*2)); wins=Tuple{Int,Int,Int,Int}[]
118
+ for i in 0:(nw-1)
119
+ oe=n-i*osz; os=oe-osz+1; ie=os-1
120
+ ie-1<100||oe-os<50 && continue
121
+ push!(wins,(1,ie,os,oe))
122
+ end
123
+ return reverse(wins)
124
+ end
125
+
126
+ function _vote(pl::Vector{Dict{String,Float64}}, oos::Vector{Float64})::Dict{String,Float64}
127
+ isempty(pl) && return Dict{String,Float64}()
128
+ length(pl)==1 && return pl[1]
129
+ w=max.(0.0,oos[1:length(pl)]); tw=sum(w)
130
+ w = tw>0 ? w./tw : fill(1.0/length(pl),length(pl))
131
+ ks=collect(keys(pl[1])); result=Dict{String,Float64}()
132
+ for k in ks
133
+ vals=[p[k] for p in pl if haskey(p,k)]
134
+ wi=w[1:length(vals)]
135
+ si=sortperm(vals); cv=cumsum(wi[si])
136
+ mi=findfirst(x->x>=0.5,cv)
137
+ result[k]=vals[si[mi!==nothing ? mi : end]]
138
+ end
139
+ return result
140
+ end
141
+
142
+ function _robustness(r::OptimResult, mt::Int)::Float64
143
+ s=clamp(r.wf_efficiency,0.0,1.0)*40.0
144
+ r.oos_sharpe_mean>0 && (s+=clamp(1.0-r.oos_sharpe_std/(r.oos_sharpe_mean+1e-9),0.0,1.0)*30.0)
145
+ s+=clamp(r.oos_trades/max(1,mt*10),0.0,1.0)*20.0
146
+ r.oos_pf_mean>1 && (s+=clamp((r.oos_pf_mean-1)/2,0.0,1.0)*10.0)
147
+ return round(s;digits=1)
148
+ end
149
+
150
+ function _viability(r::OptimResult,mt::Int,ms::Float64)::Tuple{Bool,Vector{String}}
151
+ reasons=String[]
152
+ r.oos_sharpe_mean<ms && push!(reasons,"OOS Sharpe $(round(r.oos_sharpe_mean;digits=2)) < $ms")
153
+ r.oos_trades<mt && push!(reasons,"Too few OOS trades: $(r.oos_trades) < $mt")
154
+ r.oos_max_dd>30.0 && push!(reasons,"High avg DD: $(round(r.oos_max_dd;digits=1))%")
155
+ r.wf_efficiency<0.3 && push!(reasons,"Low WFE: $(round(r.wf_efficiency;digits=2))")
156
+ r.oos_pf_mean<1.1 && push!(reasons,"PF $(round(r.oos_pf_mean;digits=2)) < 1.1")
157
+ viable=isempty(reasons)
158
+ viable && push!(reasons,"βœ… Sharpe=$(round(r.oos_sharpe_mean;digits=2)) DD=$(round(r.oos_max_dd;digits=1))% WFE=$(round(r.wf_efficiency;digits=2)) Score=$(r.robustness)/100")
159
+ return viable,reasons
160
+ end
161
+
162
+ end # module Optimizer
src/Project.toml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ [deps]
2
+ JSON3 = "0f8b85d8-7e73-4b43-9b43-f8e4f07d6bcd"
3
+ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
4
+ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
5
+
6
+ [compat]
7
+ julia = "1.10"
src/QuantEngine.jl ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ QuantEngine.jl β€” Top-level module. Only file that uses include().
3
+ Wires all submodules together by injecting dependencies explicitly.
4
+ Python imports this via juliacall.
5
+ """
6
+ module QuantEngine
7
+
8
+ using Statistics, Random
9
+
10
+ # ── Include all submodules (ONLY here) ───────────────
11
+ include("Indicators.jl")
12
+ include("BacktestEngine.jl")
13
+ include("Optimizer.jl")
14
+ include("SignalCompiler.jl")
15
+
16
+ using .Indicators
17
+ using .BacktestEngine
18
+ using .Optimizer
19
+ using .SignalCompiler
20
+
21
+ export
22
+ # Indicators
23
+ sma, ema, wma, tema, dema,
24
+ rsi, macd, stoch, cci, williams_r,
25
+ atr, bbands, keltner, donchian, adx,
26
+ vwap, obv, cmf, zscore, std_dev,
27
+ momentum, roc, highest, lowest,
28
+ crossover, crossunder,
29
+ # Engine
30
+ BacktestConfig,
31
+ # High-level
32
+ full_backtest_pipeline
33
+
34
+ """
35
+ full_backtest_pipeline(...) -> Dict{String,Any}
36
+
37
+ End-to-end: compile Julia strategy code β†’ walk-forward optimize
38
+ β†’ return plain Dict that crosses to Python cleanly.
39
+ """
40
+ function full_backtest_pipeline(
41
+ strategy_code::String, strategy_name::String,
42
+ open_p::Vector{Float64}, high::Vector{Float64},
43
+ low::Vector{Float64}, close::Vector{Float64},
44
+ volume::Vector{Float64}, timeframe::String, symbol::String;
45
+ n_windows::Int=5, is_ratio::Float64=0.70,
46
+ min_trades::Int=30, min_sharpe::Float64=0.5,
47
+ max_combos::Int=300,
48
+ initial_equity::Float64=10_000.0,
49
+ commission_pct::Float64=0.0002,
50
+ risk_per_trade::Float64=0.01,
51
+ )::Dict{String,Any}
52
+
53
+ # 1. Compile strategy β€” pass Indicators module explicitly
54
+ compiled = SignalCompiler.compile_strategy(strategy_name, strategy_code, Indicators)
55
+ if !compiled.is_valid
56
+ return Dict{String,Any}("is_valid"=>false,"error"=>compiled.error,
57
+ "strategy"=>strategy_name,"symbol"=>symbol,"timeframe"=>timeframe)
58
+ end
59
+
60
+ # 2. Walk-forward optimize β€” inject BacktestEngine functions to avoid circular deps
61
+ cfg_fn = () -> BacktestConfig(
62
+ initial_equity=initial_equity,
63
+ commission_pct=commission_pct,
64
+ risk_per_trade=risk_per_trade,
65
+ )
66
+
67
+ # Wrap run_backtest to inject atr function from Indicators
68
+ run_bt_fn = (o,h,l,c,v,sigs,tf,cfg) ->
69
+ BacktestEngine.run_backtest(o,h,l,c,v,sigs,tf,cfg, Indicators.atr)
70
+
71
+ opt = Optimizer.walk_forward_optimize(
72
+ compiled.generate_fn,
73
+ compiled.param_grid_fn(),
74
+ open_p, high, low, close, volume,
75
+ timeframe, strategy_name, symbol;
76
+ run_bt_fn=run_bt_fn,
77
+ bt_cfg_fn=cfg_fn,
78
+ n_windows=n_windows, is_ratio=is_ratio,
79
+ min_trades=min_trades, min_sharpe=min_sharpe,
80
+ max_combos=max_combos,
81
+ )
82
+
83
+ return Dict{String,Any}(
84
+ "is_valid" => true,
85
+ "strategy" => opt.strategy_name,
86
+ "symbol" => opt.symbol,
87
+ "timeframe" => opt.timeframe,
88
+ "optimal_params" => opt.optimal_params,
89
+ "oos_sharpe_mean" => opt.oos_sharpe_mean,
90
+ "oos_sharpe_std" => opt.oos_sharpe_std,
91
+ "oos_win_rate" => opt.oos_win_rate,
92
+ "oos_max_dd" => opt.oos_max_dd,
93
+ "oos_pf_mean" => opt.oos_pf_mean,
94
+ "oos_trades" => opt.oos_trades,
95
+ "wf_efficiency" => opt.wf_efficiency,
96
+ "robustness" => opt.robustness,
97
+ "is_viable" => opt.is_viable,
98
+ "reasons" => opt.reasons,
99
+ "oos_sharpes" => opt.oos_sharpes,
100
+ )
101
+ end
102
+
103
+ end # module QuantEngine
src/SignalCompiler.jl ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ SignalCompiler.jl β€” Compile AI-generated Julia strategy code.
3
+ No includes. Indicators functions injected explicitly into sandbox.
4
+ """
5
+ module SignalCompiler
6
+
7
+ using Statistics, Random
8
+
9
+ export compile_strategy, CompiledStrategy
10
+
11
+ struct CompiledStrategy
12
+ name :: String
13
+ generate_fn :: Function
14
+ param_grid_fn :: Function
15
+ is_valid :: Bool
16
+ error :: String
17
+ end
18
+
19
+ CompiledStrategy(name::String; error::String="") =
20
+ CompiledStrategy(name,
21
+ (o,h,l,c,v,p)->zeros(Int,length(c)),
22
+ ()->Dict{String,Vector{Float64}}(),
23
+ false, error)
24
+
25
+ """
26
+ compile_strategy(name, code, indicator_module) -> CompiledStrategy
27
+
28
+ indicator_module is the Indicators module, passed from QuantEngine.
29
+ """
30
+ function compile_strategy(name::String, code::String, ind_mod::Module)::CompiledStrategy
31
+ safe = replace(replace(name," "=>"_"), r"[^\w]"=>"x")
32
+ sandbox = Module(Symbol("S_"*safe*"_"*string(rand(UInt16),base=16)))
33
+
34
+ # Inject all exported Indicators functions
35
+ for fn_name in names(ind_mod; all=false)
36
+ fn_name === :Indicators && continue
37
+ try
38
+ Core.eval(sandbox,
39
+ Expr(:const, Expr(:(=), fn_name, getfield(ind_mod, fn_name))))
40
+ catch; end
41
+ end
42
+
43
+ # Inject Statistics
44
+ for sym in (:mean,:std,:var,:median,:cor,:cov)
45
+ try Core.eval(sandbox, Expr(:const, Expr(:(=),sym,getfield(Statistics,sym)))); catch; end
46
+ end
47
+
48
+ # Inject safe Base
49
+ for sym in (:length,:size,:zeros,:ones,:fill,:similar,
50
+ :sum,:prod,:diff,:cumsum,:cumprod,
51
+ :max,:min,:abs,:sqrt,:log,:exp,:floor,:ceil,:round,:clamp,
52
+ :isnan,:isinf,:isfinite,:sign,
53
+ :sort,:sortperm,:reverse,:unique,:findall,:findfirst,
54
+ :push!,:append!,:pop!,:first,:last,:eachindex,
55
+ :map,:filter,:any,:all,:count,
56
+ :Int,:Int64,:Float64,:Bool,
57
+ :Dict,:Vector,:Tuple,:Set,
58
+ :NaN,:Inf,:pi,:true,:false,
59
+ :println,:string,:get)
60
+ try Core.eval(sandbox, Expr(:const, Expr(:(=),sym,getfield(Base,sym)))); catch
61
+ try Core.eval(sandbox, Expr(:const, Expr(:(=),sym,eval(sym)))); catch; end
62
+ end
63
+ end
64
+
65
+ parsed = try Meta.parseall(code)
66
+ catch e; return CompiledStrategy(name; error="Parse: $(sprint(showerror,e))"); end
67
+
68
+ try Core.eval(sandbox, parsed)
69
+ catch e; return CompiledStrategy(name; error="Eval: $(sprint(showerror,e))"); end
70
+
71
+ isdefined(sandbox,:get_param_grid) ||
72
+ return CompiledStrategy(name; error="Missing: get_param_grid()")
73
+ isdefined(sandbox,:generate_signals) ||
74
+ return CompiledStrategy(name; error="Missing: generate_signals(o,h,l,c,v,params)")
75
+
76
+ gen_fn = getfield(sandbox, :generate_signals)
77
+ grid_fn = getfield(sandbox, :get_param_grid)
78
+
79
+ err = _smoke(gen_fn, grid_fn)
80
+ err != "" && return CompiledStrategy(name; error=err)
81
+
82
+ return CompiledStrategy(name, gen_fn, grid_fn, true, "")
83
+ end
84
+
85
+ function _smoke(gen_fn, grid_fn)::String
86
+ try
87
+ grid=grid_fn()
88
+ grid isa Dict || return "get_param_grid() must return Dict"
89
+ params=Dict{String,Float64}(k=>Float64(v isa Vector && !isempty(v) ? v[1] : 0) for (k,v) in grid)
90
+ n=200; c=100.0.*exp.(cumsum(randn(n).*0.005))
91
+ h=c.*(1.0.+abs.(randn(n)).*0.003); l=c.*(1.0.-abs.(randn(n)).*0.003)
92
+ o=c.*(1.0.+randn(n).*0.001); v=abs.(randn(n)).*1000.0.+500.0
93
+ sigs=gen_fn(o,h,l,c,v,params)
94
+ sigs isa Vector || return "generate_signals must return Vector, got $(typeof(sigs))"
95
+ length(sigs)!=n && return "Signal length $(length(sigs)) β‰  $n"
96
+ any(s->!(s in (-1,0,1)), sigs) && return "Values must be in {-1,0,1}"
97
+ catch e; return "Smoke: $(sprint(showerror,e))"; end
98
+ return ""
99
+ end
100
+
101
+ end # module SignalCompiler
src/strategy_template.jl ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ═══════════════════════════════════════════════════════════════════
2
+ # JULIA STRATEGY TEMPLATE
3
+ # This is the exact format Claude generates for each strategy.
4
+ # Two functions required. No module/using declarations needed β€”
5
+ # all Indicators functions are pre-injected by SignalCompiler.jl.
6
+ # ═══════════════════════════════════════════════════════════════════
7
+
8
+ # ── Example: EMA Crossover Strategy ─────────────────────────────────
9
+
10
+ """
11
+ Return parameter ranges for walk-forward grid search.
12
+ Keys must be valid Julia identifiers. Values are Float64 ranges.
13
+ """
14
+ function get_param_grid() :: Dict{String, Vector{Float64}}
15
+ return Dict(
16
+ "fast_period" => [10.0, 15.0, 20.0, 25.0],
17
+ "slow_period" => [40.0, 50.0, 60.0, 80.0],
18
+ "atr_filter" => [14.0], # single value = no optimization
19
+ )
20
+ end
21
+
22
+ """
23
+ Generate trading signals from OHLCV arrays.
24
+
25
+ Arguments (all same length n):
26
+ open_p, high, low, close, volume :: Vector{Float64}
27
+ params :: Dict{String,Float64} β€” one value per key from get_param_grid()
28
+
29
+ Returns Vector{Int} of length n:
30
+ 1 = enter/hold long
31
+ -1 = enter/hold short
32
+ 0 = flat / no position
33
+
34
+ Rules:
35
+ - Return 0 for the first ~slow_period bars (warmup / NaN period)
36
+ - Always use isnan() checks before comparisons
37
+ - Signals are position signals, not entry triggers
38
+ (engine manages entries/exits from signal transitions)
39
+ """
40
+ function generate_signals(
41
+ open_p :: Vector{Float64},
42
+ high :: Vector{Float64},
43
+ low :: Vector{Float64},
44
+ close :: Vector{Float64},
45
+ volume :: Vector{Float64},
46
+ params :: Dict{String, Float64},
47
+ ) :: Vector{Int}
48
+
49
+ n = length(close)
50
+ fast_p = Int(round(get(params, "fast_period", 20.0)))
51
+ slow_p = Int(round(get(params, "slow_period", 50.0)))
52
+ atr_p = Int(round(get(params, "atr_filter", 14.0)))
53
+
54
+ fast_ema = ema(close, fast_p)
55
+ slow_ema = ema(close, slow_p)
56
+ atr_vals = atr(high, low, close, atr_p)
57
+
58
+ signals = zeros(Int, n)
59
+
60
+ for i in (slow_p + 1):n
61
+ # Skip if any indicator is NaN (still in warmup)
62
+ isnan(fast_ema[i]) && continue
63
+ isnan(slow_ema[i]) && continue
64
+ isnan(atr_vals[i]) && continue
65
+
66
+ # Optional: ATR volatility filter β€” only trade when market is moving
67
+ atr_threshold = close[i] * 0.001 # 0.1% of price
68
+ atr_vals[i] < atr_threshold && continue
69
+
70
+ if fast_ema[i] > slow_ema[i]
71
+ signals[i] = 1 # bullish: long
72
+ elseif fast_ema[i] < slow_ema[i]
73
+ signals[i] = -1 # bearish: short
74
+ else
75
+ signals[i] = 0 # neutral
76
+ end
77
+ end
78
+
79
+ return signals
80
+ end
81
+
82
+ # ═══════════════════════════════════════════════════════════════════
83
+ # Example 2: RSI Mean Reversion
84
+ # ═══════════════════════════════════════════════════════════════════
85
+
86
+ # function get_param_grid()
87
+ # return Dict(
88
+ # "rsi_period" => [7.0, 10.0, 14.0, 21.0],
89
+ # "oversold" => [25.0, 30.0, 35.0],
90
+ # "overbought" => [65.0, 70.0, 75.0],
91
+ # "ma_period" => [20.0, 50.0],
92
+ # )
93
+ # end
94
+ #
95
+ # function generate_signals(open_p, high, low, close, volume, params)
96
+ # n = length(close)
97
+ # rsi_p = Int(round(get(params, "rsi_period", 14.0)))
98
+ # oversold = get(params, "oversold", 30.0)
99
+ # overbought = get(params, "overbought", 70.0)
100
+ # ma_p = Int(round(get(params, "ma_period", 50.0)))
101
+ #
102
+ # rsi_vals = rsi(close, rsi_p)
103
+ # trend_ma = sma(close, ma_p)
104
+ # signals = zeros(Int, n)
105
+ #
106
+ # for i in (ma_p + rsi_p + 1):n
107
+ # isnan(rsi_vals[i]) && continue
108
+ # isnan(trend_ma[i]) && continue
109
+ #
110
+ # # Mean reversion: buy oversold in uptrend, sell overbought in downtrend
111
+ # if rsi_vals[i] < oversold && close[i] > trend_ma[i]
112
+ # signals[i] = 1
113
+ # elseif rsi_vals[i] > overbought && close[i] < trend_ma[i]
114
+ # signals[i] = -1
115
+ # end
116
+ # end
117
+ # return signals
118
+ # end
src/warmup.jl ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ push!(LOAD_PATH, @__DIR__)
2
+ include(joinpath(@__DIR__, "QuantEngine.jl"))
3
+ using .QuantEngine
4
+ using Statistics, Random
5
+
6
+ println("Warming up all Julia hot paths...")
7
+
8
+ n=500; c=100.0.*exp.(cumsum(randn(n).*0.005))
9
+ h=c.*(1.0.+abs.(randn(n)).*0.005); l=c.*(1.0.-abs.(randn(n)).*0.005)
10
+ o=c.*(1.0.+randn(n).*0.002); v=abs.(randn(n)).*2000.0.+1000.0
11
+
12
+ _=sma(c,20); println(" sma βœ“")
13
+ _=ema(c,20); println(" ema βœ“")
14
+ _=rsi(c,14); println(" rsi βœ“")
15
+ _=macd(c); println(" macd βœ“")
16
+ _=atr(h,l,c,14); println(" atr βœ“")
17
+ _=bbands(c,20,2.0); println(" bbands βœ“")
18
+ _=donchian(h,l,20); println(" donchian βœ“")
19
+ _=adx(h,l,c,14); println(" adx βœ“")
20
+ _=stoch(h,l,c); println(" stoch βœ“")
21
+ _=zscore(c,20); println(" zscore βœ“")
22
+ println("All indicators warmed βœ“")
23
+
24
+ code = """
25
+ function get_param_grid() :: Dict{String, Vector{Float64}}
26
+ return Dict("period" => [10.0, 20.0, 30.0])
27
+ end
28
+ function generate_signals(open_p, high, low, close, volume, params)
29
+ n = length(close)
30
+ p = Int(round(get(params, "period", 20.0)))
31
+ ma = sma(close, p)
32
+ signals = zeros(Int, n)
33
+ for i in (p+1):n
34
+ isnan(ma[i]) && continue
35
+ signals[i] = close[i] > ma[i] ? 1 : -1
36
+ end
37
+ return signals
38
+ end
39
+ """
40
+
41
+ result = full_backtest_pipeline(
42
+ code, "WarmupTest",
43
+ o, h, l, c, v, "1h", "TEST";
44
+ n_windows=2, max_combos=3, min_trades=1,
45
+ )
46
+ println("full_backtest_pipeline: is_valid=$(result[\"is_valid\"]) viable=$(result[\"is_viable\"])")
47
+ println("\nβœ… Julia warmup complete β€” all hot paths compiled.")
src/warmup_bridge.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ warmup_bridge.py
3
+ Pre-warms the juliacall Python↔Julia bridge at build time.
4
+ Called from Dockerfile Step 3 β€” runs once, caches the Julia
5
+ session location so runtime startup is instant.
6
+ """
7
+ import os
8
+ import sys
9
+
10
+ os.environ["JULIA_PROJECT"] = "/app/src"
11
+ os.environ["JULIA_DEPOT_PATH"] = "/app/.julia"
12
+
13
+ print("Pre-warming juliacall bridge...")
14
+ try:
15
+ from juliacall import Main as jl
16
+ jl.seval('push!(LOAD_PATH, "/app/src")')
17
+ jl.seval('include("/app/src/QuantEngine.jl")')
18
+ jl.seval("using .QuantEngine")
19
+
20
+ # Quick sanity check β€” call one indicator through the bridge
21
+ import numpy as np
22
+ c = (100.0 * np.exp(np.cumsum(np.random.randn(100) * 0.005))).tolist()
23
+ result = jl.QuantEngine.sma(jl.convert(jl.Vector[jl.Float64], c), 20)
24
+ assert len(result) == 100
25
+ print("juliacall bridge warmed up βœ“")
26
+ sys.exit(0)
27
+ except Exception as e:
28
+ print(f"WARNING: juliacall warmup failed: {e}")
29
+ print("App will still work β€” Julia initialises on first request instead.")
30
+ sys.exit(0) # Non-fatal β€” don't break the build
utils/__init__.py ADDED
File without changes
utils/config.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """utils/config.py β€” reads from HF Spaces Secrets (env vars)."""
2
+ import os
3
+ from pathlib import Path
4
+
5
+ TMP = Path("/tmp/quant")
6
+ for d in ["pdfs","tick_cache","compiled","exports"]:
7
+ (TMP / d).mkdir(parents=True, exist_ok=True)
8
+
9
+ def get(k, default=""): return os.environ.get(k, default)
10
+
11
+ ANTHROPIC_API_KEY = get("ANTHROPIC_API_KEY")
12
+ HF_TOKEN = get("HF_TOKEN")
13
+ HF_DATASET_REPO = get("HF_DATASET_REPO")
14
+ HF_TICK_REPO = get("HF_TICK_REPO")
15
+
16
+ SIMILARITY_THRESHOLD = float(get("SIMILARITY_THRESHOLD", "0.85"))
17
+ MAX_TOKENS_PER_CHUNK = int(get("MAX_TOKENS_PER_CHUNK", "3000"))
18
+ OCR_DPI = int(get("OCR_DPI", "300"))
19
+
20
+ INITIAL_EQUITY = float(get("INITIAL_EQUITY", "10000"))
21
+ COMMISSION_PCT = float(get("COMMISSION_PCT", "0.0002"))
22
+ RISK_PER_TRADE = float(get("RISK_PER_TRADE", "0.01"))
23
+ WF_WINDOWS = int(get("WF_WINDOWS", "5"))
24
+ WF_IS_RATIO = float(get("WF_IS_RATIO", "0.70"))
25
+ MAX_PARAM_COMBOS = int(get("MAX_PARAM_COMBOS", "300"))
26
+ MIN_TRADES = int(get("MIN_TRADES", "30"))
27
+ MIN_SHARPE = float(get("MIN_SHARPE", "0.5"))
28
+ BACKTEST_TFS = get("BACKTEST_TIMEFRAMES", "1h,4h,1d").split(",")
29
+
30
+ CATEGORIES = [
31
+ "Trend Following","Mean Reversion","Statistical Arbitrage",
32
+ "Momentum","Breakout","Volatility Trading","Market Making",
33
+ "Pattern Recognition","Machine Learning","Options Strategy",
34
+ "High Frequency","Pairs Trading","Carry Trade",
35
+ "Seasonal / Calendar","Risk Management","Position Sizing",
36
+ "Portfolio Construction","Market Microstructure","Other",
37
+ ]
38
+
39
+ EXTRACTION_PROMPT = """
40
+ You are a quantitative finance knowledge extraction engine.
41
+
42
+ Extract ALL trading strategies, mathematical formulas, and complete trading systems
43
+ from the text below (taken from an algorithmic trading book).
44
+
45
+ Output ONLY valid JSON β€” no markdown fences, no preamble:
46
+ {
47
+ "strategies": [{
48
+ "name": "string", "category": "string", "description": "string",
49
+ "entry_rules": ["string"], "exit_rules": ["string"],
50
+ "filters": ["string"], "timeframes": ["string"], "instruments": ["string"],
51
+ "parameters": {"name": "description with typical value"},
52
+ "mathematical_basis": "string", "source_context": "string"
53
+ }],
54
+ "formulas": [{
55
+ "name": "string", "category": "string",
56
+ "latex": "LaTeX string", "plain_text": "string",
57
+ "variables": {"symbol": "description"},
58
+ "purpose": "string", "usage_context": "string", "source_context": "string"
59
+ }],
60
+ "systems": [{
61
+ "name": "string", "components": ["string"],
62
+ "entry_system": "string", "exit_system": "string",
63
+ "risk_management": "string", "position_sizing": "string",
64
+ "backtesting_notes": "string", "source_context": "string"
65
+ }]
66
+ }
67
+
68
+ Rules: empty arrays [] if nothing found. Preserve exact math. Include LaTeX.
69
+ Source: {source_file} | Pages: {page_start}–{page_end}
70
+ --- TEXT ---
71
+ {text}
72
+ --- END ---
73
+ """.strip()
74
+
75
+ COMPILER_PROMPT = """
76
+ You are a Julia algorithmic trading code generator.
77
+
78
+ Convert the strategy JSON below into executable Julia code.
79
+ Output ONLY the Julia code β€” no markdown fences, no explanation, no module/using declarations.
80
+
81
+ EXACT REQUIRED FORMAT (two functions, nothing else):
82
+
83
+ function get_param_grid() :: Dict{{String, Vector{{Float64}}}}
84
+ return Dict(
85
+ "param_name" => [val1, val2, val3],
86
+ )
87
+ end
88
+
89
+ function generate_signals(
90
+ open_p :: Vector{{Float64}},
91
+ high :: Vector{{Float64}},
92
+ low :: Vector{{Float64}},
93
+ close :: Vector{{Float64}},
94
+ volume :: Vector{{Float64}},
95
+ params :: Dict{{String, Float64}},
96
+ ) :: Vector{{Int}}
97
+ n = length(close)
98
+ signals = zeros(Int, n)
99
+ # ... your logic here ...
100
+ return signals
101
+ end
102
+
103
+ RULES (CRITICAL β€” violations cause compile failure):
104
+ 1. NO module, NO using, NO include statements
105
+ 2. ALWAYS check isnan() before using indicator values
106
+ 3. Return signals[i] = 0 during indicator warmup period
107
+ 4. Values: 1=long, -1=short, 0=flat only
108
+ 5. Get int params: Int(round(get(params, "key", default)))
109
+ 6. Get float params: get(params, "key", default)
110
+
111
+ AVAILABLE FUNCTIONS (pre-injected, call directly without prefix):
112
+ Trend: sma(s,n) ema(s,n) wma(s,n) tema(s,n) dema(s,n)
113
+ Momentum: rsi(c,n) macd(c;fast,slow,sig)->(ml,sl,hist) momentum(s,n) roc(s,n)
114
+ Bands: bbands(c,n,k)->(up,mid,lo) keltner(h,l,c,n,k)->(up,mid,lo)
115
+ Channel: donchian(h,l,n)->(up,mid,lo) highest(s,n) lowest(s,n)
116
+ Volatility: atr(h,l,c,n) std_dev(s,n) zscore(s,n)
117
+ Oscillators: stoch(h,l,c;k,d)->(K,D) cci(h,l,c,n) williams_r(h,l,c,n)
118
+ Volume: vwap(h,l,c,v) obv(c,v) cmf(h,l,c,v,n)
119
+ Trend strength: adx(h,l,c,n)->(adx,pdi,ndi)
120
+ Crosses: crossover(a,b)->Bool[] crossunder(a,b)->Bool[]
121
+ Math: mean(v) std(v) diff(v) cumsum(v) abs(x) sqrt(x)
122
+
123
+ Strategy:
124
+ {strategy_json}
125
+ """.strip()
utils/hf_io.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """utils/hf_io.py β€” All HuggingFace Hub read/write."""
2
+ import io, json
3
+ from pathlib import Path
4
+ from typing import Optional
5
+ import pandas as pd
6
+ from huggingface_hub import HfApi, hf_hub_download, list_repo_files, CommitOperationAdd
7
+ from loguru import logger
8
+ import utils.config as cfg
9
+
10
+ def _api(): return HfApi(token=cfg.HF_TOKEN)
11
+
12
+ # ── Knowledge base ─────────────────────────────────────
13
+
14
+ def kb_load() -> dict:
15
+ empty = {"strategies": {}, "formulas": {}, "systems": {}}
16
+ if not cfg.HF_DATASET_REPO: return empty
17
+ try:
18
+ path = hf_hub_download(
19
+ repo_id=cfg.HF_DATASET_REPO, filename="knowledge_base.jsonl",
20
+ repo_type="dataset", token=cfg.HF_TOKEN,
21
+ local_dir=str(cfg.TMP), force_download=True,
22
+ )
23
+ result = {"strategies": {}, "formulas": {}, "systems": {}}
24
+ with open(path, encoding="utf-8") as f:
25
+ for line in f:
26
+ line = line.strip()
27
+ if not line: continue
28
+ rec = json.loads(line)
29
+ kind = rec.get("_type", ""); cid = rec.get("canonical_id", "")
30
+ if kind in result and cid: result[kind][cid] = rec
31
+ logger.info(f"KB: {len(result['strategies'])} strats, {len(result['formulas'])} formulas")
32
+ return result
33
+ except Exception as e:
34
+ logger.warning(f"KB load (may not exist yet): {e}")
35
+ return empty
36
+
37
+ def kb_save(kb: dict) -> bool:
38
+ if not cfg.HF_DATASET_REPO: return False
39
+ try:
40
+ lines = []
41
+ for kind in ("strategies","formulas","systems"):
42
+ for rec in kb[kind].values():
43
+ lines.append(json.dumps({**rec, "_type": kind}))
44
+ _api().upload_file(
45
+ path_or_fileobj=io.BytesIO("\n".join(lines).encode()),
46
+ path_in_repo="knowledge_base.jsonl",
47
+ repo_id=cfg.HF_DATASET_REPO, repo_type="dataset",
48
+ commit_message="Update knowledge base",
49
+ )
50
+ return True
51
+ except Exception as e:
52
+ logger.error(f"KB save: {e}"); return False
53
+
54
+ # ── Tick data ──────────────────────────────────────────
55
+
56
+ def tick_list_symbols() -> list[str]:
57
+ if not cfg.HF_TICK_REPO: return []
58
+ try:
59
+ files = list(list_repo_files(repo_id=cfg.HF_TICK_REPO,
60
+ repo_type="dataset", token=cfg.HF_TOKEN))
61
+ seen = set(); syms = []
62
+ for f in files:
63
+ parts = f.split("/")
64
+ if len(parts) >= 2 and parts[0] not in seen:
65
+ seen.add(parts[0]); syms.append(parts[0])
66
+ return sorted(syms)
67
+ except Exception as e:
68
+ logger.warning(f"Tick symbols: {e}"); return []
69
+
70
+ def tick_load(symbol: str, timeframe: str = "1h") -> Optional[pd.DataFrame]:
71
+ cache = cfg.TMP / "tick_cache" / f"{symbol}_{timeframe}.parquet"
72
+ if cache.exists(): return pd.read_parquet(cache)
73
+ if not cfg.HF_TICK_REPO: return None
74
+ for fname in [f"{timeframe}.parquet", f"{timeframe}.csv",
75
+ "ticks.parquet", "data.parquet"]:
76
+ df = _try_dl(symbol, fname)
77
+ if df is not None:
78
+ df = _norm_ohlcv(df, timeframe if fname.startswith("tick") or fname=="data.parquet" else None)
79
+ if df is not None and not df.empty:
80
+ df.to_parquet(cache); return df
81
+ return None
82
+
83
+ def _try_dl(sym, fname):
84
+ try:
85
+ local = cfg.TMP / "tick_cache" / sym
86
+ local.mkdir(parents=True, exist_ok=True)
87
+ path = hf_hub_download(repo_id=cfg.HF_TICK_REPO,
88
+ filename=f"{sym}/{fname}", repo_type="dataset",
89
+ token=cfg.HF_TOKEN, local_dir=str(local), force_download=False)
90
+ return pd.read_parquet(path) if fname.endswith(".parquet") else pd.read_csv(path)
91
+ except Exception:
92
+ return None
93
+
94
+ _TF_MAP = {"1m":"1min","5m":"5min","15m":"15min","30m":"30min",
95
+ "1h":"1h","4h":"4h","1d":"1D","1w":"1W"}
96
+
97
+ def _norm_ohlcv(df: pd.DataFrame, resample_to=None) -> Optional[pd.DataFrame]:
98
+ import numpy as np
99
+ df = df.copy()
100
+ ts = next((c for c in df.columns if "time" in c.lower() or "date" in c.lower()), None)
101
+ if ts: df.index = pd.to_datetime(df[ts], utc=True); df = df.drop(columns=[ts])
102
+ else:
103
+ try: df.index = pd.to_datetime(df.index, utc=True)
104
+ except: return None
105
+ df.index = df.index.tz_convert("UTC") if df.index.tz else df.index.tz_localize("UTC")
106
+ df = df.sort_index()
107
+ if resample_to:
108
+ price_col = next((c for c in df.columns if c.lower() in ("bid","mid","price","close")), None)
109
+ if price_col is None: return None
110
+ if "bid" in df.columns and "ask" in df.columns:
111
+ df["_price"] = (df["bid"] + df["ask"]) / 2
112
+ else: df["_price"] = df[price_col]
113
+ rule = _TF_MAP.get(resample_to, "1h")
114
+ ohlcv = df["_price"].resample(rule).ohlc()
115
+ ohlcv.columns = ["open","high","low","close"]
116
+ vcol = next((c for c in df.columns if "vol" in c.lower()), None)
117
+ ohlcv["volume"] = df[vcol].resample(rule).sum() if vcol else df["_price"].resample(rule).count()
118
+ return ohlcv.dropna()
119
+ renames = {}
120
+ for c in df.columns:
121
+ lc = c.lower()
122
+ if lc in ("o","open"): renames[c]="open"
123
+ elif lc in ("h","high"): renames[c]="high"
124
+ elif lc in ("l","low"): renames[c]="low"
125
+ elif lc in ("c","close"): renames[c]="close"
126
+ elif lc in ("v","vol","volume","tick_volume"): renames[c]="volume"
127
+ df = df.rename(columns=renames)
128
+ for col in ["open","high","low","close"]:
129
+ if col not in df.columns: return None
130
+ if "volume" not in df.columns: df["volume"] = 0.0
131
+ df = df[["open","high","low","close","volume"]].astype(float).dropna(subset=["open","high","low","close"])
132
+ bad = df["high"] < df["low"]
133
+ if bad.any(): df.loc[bad,["high","low"]] = df.loc[bad,["low","high"]].values
134
+ return df
135
+
136
+ # ── Batch push ─────────────────────────────────────────
137
+
138
+ def push_batch(files: list[tuple[str, bytes]], msg="Update") -> int:
139
+ if not cfg.HF_DATASET_REPO or not files: return 0
140
+ ops = [CommitOperationAdd(path_in_repo=p, path_or_fileobj=io.BytesIO(c)) for p,c in files]
141
+ pushed = 0
142
+ for i in range(0, len(ops), 100):
143
+ try:
144
+ _api().create_commit(repo_id=cfg.HF_DATASET_REPO, repo_type="dataset",
145
+ operations=ops[i:i+100], commit_message=f"{msg} [{i+1}–{i+len(ops[i:i+100])}]")
146
+ pushed += len(ops[i:i+100])
147
+ except Exception as e: logger.error(f"Batch push: {e}")
148
+ return pushed
149
+
150
+ def push_result(name, symbol, tf, report, opt_json, mt5_set, julia_cfg) -> bool:
151
+ from pipeline.exporter import slugify
152
+ sl = slugify(name); pre = f"{sl}_{symbol}_{tf}"
153
+ files = [
154
+ (f"backtests/{sl}/{pre}_report.md", report.encode()),
155
+ (f"optimal_sets/{pre}_optimal.json", json.dumps(opt_json,indent=2).encode()),
156
+ (f"optimal_sets/{pre}.set", mt5_set.encode()),
157
+ (f"optimal_sets/{pre}_config.jl", julia_cfg.encode()),
158
+ ]
159
+ return push_batch(files, f"Backtest: {name} {symbol} {tf}") == 4
160
+
161
+ def push_index(md: str, data: dict) -> bool:
162
+ return push_batch([
163
+ ("optimal_sets/BACKTEST_INDEX.md", md.encode()),
164
+ ("optimal_sets/backtest_index.json", json.dumps(data,indent=2).encode()),
165
+ ], "Update index") == 2
166
+
167
+ def fetch_index() -> dict:
168
+ try:
169
+ path = hf_hub_download(repo_id=cfg.HF_DATASET_REPO,
170
+ filename="optimal_sets/backtest_index.json",
171
+ repo_type="dataset", token=cfg.HF_TOKEN,
172
+ local_dir=str(cfg.TMP), force_download=True)
173
+ return json.loads(Path(path).read_text())
174
+ except: return {}
175
+
176
+ def fetch_file(remote: str) -> Optional[bytes]:
177
+ try:
178
+ path = hf_hub_download(repo_id=cfg.HF_DATASET_REPO,
179
+ filename=remote, repo_type="dataset", token=cfg.HF_TOKEN,
180
+ local_dir=str(cfg.TMP/"downloads"), force_download=True)
181
+ return Path(path).read_bytes()
182
+ except: return None
183
+
184
+ def pdf_upload(pdf_path: Path) -> str:
185
+ if not cfg.HF_DATASET_REPO: return ""
186
+ try:
187
+ return str(_api().upload_file(path_or_fileobj=str(pdf_path),
188
+ path_in_repo=f"pdfs/{pdf_path.name}",
189
+ repo_id=cfg.HF_DATASET_REPO, repo_type="dataset",
190
+ commit_message=f"Add PDF: {pdf_path.name}"))
191
+ except Exception as e:
192
+ logger.warning(f"PDF upload: {e}"); return ""