Spaces:

cyberkyne
/

quant-knowledge-extractor

Sleeping

App Files Files Community

cyberkyne commited on Mar 19

Commit

094a5f6

verified ·

1 Parent(s): bbe52bd

Upload 22 files

Browse files

Files changed (22) hide show

Dockerfile +54 -0
README.md +25 -0
app.py +482 -0
pipeline/__init__.py +0 -0
pipeline/exporter.py +202 -0
pipeline/extractor.py +153 -0
pipeline/julia_bridge.py +212 -0
pipeline/pdf_processor.py +88 -0
requirements.txt +32 -0
src/BacktestEngine.jl +148 -0
src/Indicators.jl +223 -0
src/Manifest.toml +19 -0
src/Optimizer.jl +162 -0
src/Project.toml +7 -0
src/QuantEngine.jl +103 -0
src/SignalCompiler.jl +101 -0
src/strategy_template.jl +118 -0
src/warmup.jl +47 -0
src/warmup_bridge.py +30 -0
utils/__init__.py +0 -0
utils/config.py +125 -0
utils/hf_io.py +192 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,54 @@

+FROM python:3.11-slim
+# ── System dependencies ───────────────────────────────
+RUN apt-get update && apt-get install -y \
+    curl wget git ca-certificates \
+    tesseract-ocr tesseract-ocr-eng \
+    poppler-utils \
+    libgl1 libglib2.0-0 libsm6 libxext6 \
+    build-essential gfortran \
+    && rm -rf /var/lib/apt/lists/*
+# ── Install Julia 1.10 LTS ────────────────────────────
+ENV JULIA_VERSION=1.10.7
+RUN wget -q https://julialang-s3.julialang.org/bin/linux/x64/1.10/julia-${JULIA_VERSION}-linux-x86_64.tar.gz \
+    && tar -xzf julia-${JULIA_VERSION}-linux-x86_64.tar.gz \
+    && mv julia-${JULIA_VERSION} /usr/local/julia \
+    && ln -s /usr/local/julia/bin/julia /usr/local/bin/julia \
+    && rm julia-${JULIA_VERSION}-linux-x86_64.tar.gz \
+    && julia --version
+# ── Julia / app environment ───────────────────────────
+ENV JULIA_DEPOT_PATH=/app/.julia
+ENV JULIA_NUM_THREADS=4
+ENV JULIA_PROJECT=/app/src
+WORKDIR /app
+# ── Python dependencies ───────────────────────────────
+COPY requirements.txt /tmp/requirements.txt
+RUN pip install --no-cache-dir -r /tmp/requirements.txt
+# ── Copy project files ────────────────────────────────
+COPY . .
+# ── Step 1: Resolve + precompile Julia packages ───────
+# Write Julia code to a temp script to avoid Docker misreading
+# Julia keywords (using/import) as Dockerfile instructions.
+RUN printf 'import Pkg\nPkg.instantiate()\nPkg.precompile()\nprintln("Julia packages resolved")\n' \
+    | julia --project=/app/src
+# ── Step 2: Warmup — JIT-compile all hot paths ────────
+RUN julia --project=/app/src /app/src/warmup.jl
+# ── Step 3: Pre-warm juliacall Python↔Julia bridge ────
+RUN python3 /app/src/warmup_bridge.py
+# ── Runtime ───────────────────────────────────────────
+ENV GRADIO_SERVER_NAME=0.0.0.0
+ENV GRADIO_SERVER_PORT=7860
+ENV JULIA_PROJECT=/app/src
+ENV JULIA_DEPOT_PATH=/app/.julia
+EXPOSE 7860
+CMD ["python", "app.py"]

README.md ADDED Viewed

	@@ -0,0 +1,25 @@

+---
+title: Quant Knowledge Extractor
+emoji: 📊
+colorFrom: green
+colorTo: gray
+sdk: docker
+pinned: true
+license: mit
+---
+# 📊 Quant Knowledge Extractor — Julia Engine
+Upload PDFs → Extract strategies → Backtest with Julia → Download MT5 `.set` files.
+**Julia handles all computation** (indicators, backtest engine, walk-forward optimizer).
+Python handles only UI (Gradio) and API calls (Claude, HuggingFace).
+## Setup — Add these Secrets in Space Settings → Variables and Secrets
+| Secret | Description |
+|--------|-------------|
+| `ANTHROPIC_API_KEY` | Claude API key |
+| `HF_TOKEN` | HuggingFace write token |
+| `HF_DATASET_REPO` | `your-username/quant-knowledge-base` |
+| `HF_TICK_REPO` | `your-username/tick-data` |

app.py ADDED Viewed

	@@ -0,0 +1,482 @@

+"""
+app.py — HuggingFace Spaces entry point.
+Architecture:
+  Python  : Gradio UI, Claude API calls, HF I/O, PDF processing
+  Julia   : Indicators, BacktestEngine, WalkForwardOptimizer, SignalCompiler
+Python NEVER does numerical computation. It only:
+  1. Calls Claude API (extraction + strategy code generation)
+  2. Calls Julia via juliacall for all math
+  3. Reads/writes HuggingFace datasets
+  4. Renders Gradio UI
+"""
+import io, json, zipfile, tempfile
+from pathlib import Path
+from datetime import datetime
+import gradio as gr
+from loguru import logger
+import utils.config as cfg
+import utils.hf_io as hf
+from pipeline.pdf_processor import PDFProcessor
+from pipeline.extractor import AIExtractor, Deduplicator
+from pipeline.julia_bridge import full_backtest_pipeline, julia_available
+from pipeline.exporter import (
+    slugify, strategy_md, formula_md,
+    backtest_report_md, optimal_json, mt5_set,
+    julia_config, index_md,
+)
+# ── Lazy KB ───────────────────────────────────────────
+_kb = None
+def get_kb():
+    global _kb
+    if _kb is None: _kb = hf.kb_load()
+    return _kb
+def reset_kb():
+    global _kb; _kb = hf.kb_load()
+# ═══════════════════════════════════════════════════
+#  TAB 1 — UPLOAD & EXTRACT
+# ═══════════════════════════════════════════════════
+def run_extraction(pdf_files, progress=gr.Progress()):
+    if not pdf_files: return "⚠️ No PDFs uploaded.", ""
+    if not cfg.ANTHROPIC_API_KEY: return "❌ ANTHROPIC_API_KEY secret not set.", ""
+    if not cfg.HF_DATASET_REPO:   return "❌ HF_DATASET_REPO secret not set.", ""
+    proc  = PDFProcessor()
+    ai    = AIExtractor()
+    dedup = Deduplicator()
+    kb    = get_kb()
+    log   = []
+    totals = {k:{"added":0,"merged":0,"skipped":0} for k in ("strategies","formulas","systems")}
+    hf_files = []
+    for i, pdf_file in enumerate(pdf_files):
+        path = Path(pdf_file.name)
+        progress(i/len(pdf_files), desc=f"{path.name}")
+        log.append(f"\n📖 [{i+1}/{len(pdf_files)}] {path.name}")
+        try:
+            chunks = list(proc.process(path))
+            log.append(f"  → {len(chunks)} chunks")
+        except Exception as e:
+            log.append(f"  ❌ {e}"); continue
+        for chunk in chunks:
+            extracted = ai.extract(chunk)
+            stats     = dedup.process(extracted, kb)
+            for kind in ("strategies","formulas","systems"):
+                for act in ("added","merged","skipped"):
+                    totals[kind][act] += stats[kind][act]
+        log.append(f"  → New: {totals['strategies']['added']} strats, {totals['formulas']['added']} formulas")
+        if cfg.HF_TOKEN: hf.pdf_upload(path)
+    for cid, rec in kb["strategies"].items():
+        hf_files.append((f"extracted/strategies/{slugify(rec.get('name',''))}.md",
+                         strategy_md(rec).encode()))
+    for cid, rec in kb["formulas"].items():
+        hf_files.append((f"extracted/formulas/{slugify(rec.get('name',''))}.md",
+                         formula_md(rec).encode()))
+    progress(0.9, desc="Saving to HuggingFace…")
+    hf.kb_save(kb)
+    if hf_files and cfg.HF_TOKEN:
+        pushed = hf.push_batch(hf_files, "Update extracted knowledge")
+        log.append(f"\n☁️ Pushed {pushed} files to HuggingFace")
+    reset_kb()
+    counts = {k: len(kb[k]) for k in kb}
+    summary = f"""✅ Extraction Complete
+PDFs processed: {len(pdf_files)}
+Strategies  — added: {totals['strategies']['added']}  merged: {totals['strategies']['merged']}  skipped: {totals['strategies']['skipped']}
+Formulas    — added: {totals['formulas']['added']}  merged: {totals['formulas']['merged']}  skipped: {totals['formulas']['skipped']}
+Systems     — added: {totals['systems']['added']}  merged: {totals['systems']['merged']}  skipped: {totals['systems']['skipped']}
+KB totals: {counts['strategies']} strategies · {counts['formulas']} formulas · {counts['systems']} systems
+Tokens used: {ai.tokens_used:,}"""
+    return summary, "\n".join(log[-40:])
+# ═══════════════════════════════════════════════════
+#  TAB 2 — BROWSE KB
+# ═══════════════════════════════════════════════════
+def search_strategies(query, category):
+    kb = get_kb(); items = list(kb["strategies"].values())
+    if category and category != "All":
+        items = [x for x in items if x.get("category") == category]
+    if query:
+        q = query.lower()
+        items = [x for x in items if q in x.get("name","").lower() or q in x.get("description","").lower()]
+    rows = [[x.get("name","")[:50], x.get("category",""),
+             x.get("description","")[:100],
+             ", ".join(x.get("sources",[]))[:40], len(x.get("layers",[]))]
+            for x in items[:100]]
+    return rows, f"{len(items)} strategies"
+def search_formulas(query):
+    kb = get_kb(); items = list(kb["formulas"].values())
+    if query:
+        q = query.lower()
+        items = [x for x in items if q in x.get("name","").lower() or q in x.get("purpose","").lower()]
+    return [[x.get("name","")[:50], x.get("category",""),
+             x.get("purpose","")[:80],
+             "✅" if x.get("latex") else "—",
+             ", ".join(x.get("sources",[]))[:40]] for x in items[:100]]
+def dl_strategy(name):
+    kb = get_kb()
+    for rec in kb["strategies"].values():
+        if rec.get("name","").lower() == name.strip().lower():
+            tmp = tempfile.mktemp(suffix=".md")
+            Path(tmp).write_text(strategy_md(rec), encoding="utf-8")
+            return tmp
+    return None
+def dl_all_strategies_zip(category):
+    kb = get_kb(); items = list(kb["strategies"].values())
+    if category and category != "All":
+        items = [x for x in items if x.get("category") == category]
+    tmp = tempfile.mktemp(suffix=".zip")
+    with zipfile.ZipFile(tmp, "w", zipfile.ZIP_DEFLATED) as zf:
+        for rec in items:
+            zf.writestr(f"{slugify(rec.get('name','unknown'))}.md", strategy_md(rec))
+    return tmp
+# ═══════════════════════════════════════════════════
+#  TAB 3 — BACKTEST (Julia Engine)
+# ═══════════════════════════════════════════════════
+def load_symbols():
+    syms = hf.tick_list_symbols()
+    return gr.update(choices=syms, value=syms[:2] if len(syms)>=2 else syms)
+def run_backtests(selected_symbols, selected_timeframes,
+                  strategy_filter, max_strategies, viable_only,
+                  progress=gr.Progress()):
+    if not cfg.HF_TICK_REPO:       return "❌ HF_TICK_REPO not set.", ""
+    if not cfg.ANTHROPIC_API_KEY:  return "❌ ANTHROPIC_API_KEY not set.", ""
+    if not julia_available():      return "❌ Julia runtime not available. Check build logs.", ""
+    ai   = AIExtractor()
+    kb   = get_kb()
+    strats = list(kb["strategies"].values())
+    if strategy_filter:
+        strats = [s for s in strats if strategy_filter.lower() in s.get("name","").lower()]
+    if max_strategies > 0:
+        strats = strats[:int(max_strategies)]
+    if not strats: return "⚠️ No strategies. Run extraction first.", ""
+    symbols    = selected_symbols or hf.tick_list_symbols()[:2]
+    timeframes = selected_timeframes or ["1h"]
+    log, all_results, viable_count = [], [], 0
+    for si, rec in enumerate(strats):
+        name = rec.get("name","?")
+        progress(si/len(strats), desc=f"[{si+1}/{len(strats)}] {name[:35]}")
+        # 1. Generate Julia signal code via Claude
+        jl_code = ai.compile_strategy_code(rec)
+        if not jl_code:
+            log.append(f"❌ Code gen failed: {name[:40]}"); continue
+        log.append(f"✅ Julia code generated: {name[:40]}")
+        for sym in symbols:
+            for tf in timeframes:
+                df = hf.tick_load(sym, tf)
+                if df is None or len(df) < 200:
+                    log.append(f"  ⚠️ {sym} {tf}: no data"); continue
+                # 2. Full Julia pipeline (compile → optimize → backtest)
+                result = full_backtest_pipeline(
+                    strategy_code  = jl_code,
+                    strategy_name  = name,
+                    open_p         = df["open"].values,
+                    high           = df["high"].values,
+                    low            = df["low"].values,
+                    close          = df["close"].values,
+                    volume         = df["volume"].values,
+                    timeframe      = tf,
+                    symbol         = sym,
+                    n_windows      = cfg.WF_WINDOWS,
+                    is_ratio       = cfg.WF_IS_RATIO,
+                    min_trades     = cfg.MIN_TRADES,
+                    min_sharpe     = cfg.MIN_SHARPE,
+                    max_combos     = cfg.MAX_PARAM_COMBOS,
+                    initial_equity = cfg.INITIAL_EQUITY,
+                    commission_pct = cfg.COMMISSION_PCT,
+                    risk_per_trade = cfg.RISK_PER_TRADE,
+                )
+                all_results.append(result)
+                # 3. Build + push output files
+                if cfg.HF_TOKEN and cfg.HF_DATASET_REPO:
+                    if not viable_only or result.get("is_viable"):
+                        hf.push_result(
+                            name, sym, tf,
+                            backtest_report_md(result, rec),
+                            optimal_json(result, rec),
+                            mt5_set(result, rec),
+                            julia_config(result),
+                        )
+                status = "✅" if result.get("is_viable") else "❌"
+                log.append(
+                    f"  {status} {sym} {tf}: "
+                    f"Sharpe={result.get('oos_sharpe_mean',0):.2f} "
+                    f"DD={result.get('oos_max_dd',0):.1f}% "
+                    f"Score={result.get('robustness',0):.0f}")
+                if result.get("is_viable"): viable_count += 1
+    # 4. Push master index
+    if all_results and cfg.HF_TOKEN:
+        hf.push_index(index_md(all_results), {
+            "generated": datetime.now().isoformat(),
+            "engine": "Julia 1.10",
+            "total_strategies": len(all_results),
+            "viable_count": viable_count,
+            "strategies": all_results,
+        })
+    summary = f"""🏁 Julia Backtest Complete
+Engine:               Julia 1.10 BacktestEngine.jl
+Strategies compiled:  {len(strats)}
+Combinations tested:  {len(all_results)}
+Viable strategies:    {viable_count}
+Pass rate:            {viable_count/max(len(all_results),1)*100:.1f}%
+Results on HuggingFace:
+  {cfg.HF_DATASET_REPO}/optimal_sets/BACKTEST_INDEX.md"""
+    return summary, "\n".join(log[-60:])
+# ═══════════════════════════════════════════════════
+#  TAB 4 — RESULTS
+# ═══════════════════════════════════════════════════
+def load_results():
+    data = hf.fetch_index()
+    if not data: return [], "No results yet."
+    strats  = data.get("strategies",[])
+    viable  = sorted([s for s in strats if s.get("is_viable")],
+                     key=lambda x: x.get("oos_sharpe_mean",0), reverse=True)
+    rows    = [[s.get("strategy","")[:45], s.get("symbol",""), s.get("timeframe",""),
+                f'{s.get("oos_sharpe_mean",0):.2f}', f'{s.get("oos_max_dd",0):.1f}%',
+                f'{s.get("oos_win_rate",0):.1f}%', f'{s.get("oos_pf_mean",0):.2f}',
+                f'{s.get("robustness",0):.0f}'] for s in viable]
+    count   = (f"✅ {len(viable)} viable / {len(strats)} tested | "
+               f"Engine: Julia | {data.get('generated','')[:16]}")
+    return rows, count
+def dl_result_file(name, symbol, tf, ftype):
+    sl  = slugify(name); sym = symbol.upper().strip()
+    pre = f"{sl}_{sym}_{tf}"
+    ext_map = {"MT5 .set file": f"optimal_sets/{pre}.set",
+               "Optimal JSON":  f"optimal_sets/{pre}_optimal.json",
+               "Julia config":  f"optimal_sets/{pre}_config.jl",
+               "Full report":   f"backtests/{sl}/{pre}_report.md"}
+    remote = ext_map.get(ftype,"")
+    if not remote: return None
+    data = hf.fetch_file(remote)
+    if not data: return None
+    tmp = tempfile.mktemp(suffix=Path(remote).suffix)
+    Path(tmp).write_bytes(data)
+    return tmp
+def dl_all_sets():
+    data = hf.fetch_index()
+    if not data: return None
+    tmp = tempfile.mktemp(suffix=".zip")
+    with zipfile.ZipFile(tmp,"w",zipfile.ZIP_DEFLATED) as zf:
+        for s in data.get("strategies",[]):
+            if not s.get("is_viable"): continue
+            sl = slugify(s["strategy"]); sym = s["symbol"]; tf = s["timeframe"]
+            content = hf.fetch_file(f"optimal_sets/{sl}_{sym}_{tf}.set")
+            if content: zf.writestr(f"{sl}_{sym}_{tf}.set", content)
+    return tmp
+# ═══════════════════════════════════════════════════
+#  TAB 5 — SETUP
+# ═══════════════════════════════════════════════════
+def check_config():
+    checks = [
+        ("ANTHROPIC_API_KEY", cfg.ANTHROPIC_API_KEY, "Claude API"),
+        ("HF_TOKEN",          cfg.HF_TOKEN,           "HF write access"),
+        ("HF_DATASET_REPO",   cfg.HF_DATASET_REPO,    "Results storage"),
+        ("HF_TICK_REPO",      cfg.HF_TICK_REPO,        "Tick data source"),
+    ]
+    kb      = get_kb()
+    symbols = hf.tick_list_symbols() if cfg.HF_TICK_REPO else []
+    jl_ok   = julia_available()
+    lines = ["## Configuration Status", ""]
+    for name, val, desc in checks:
+        icon = "✅" if val else "❌"
+        lines.append(f"{icon} `{name}` — {desc}")
+    lines += ["", "## Julia Engine", "",
+              f"{'✅' if jl_ok else '❌'} Julia runtime: {'available' if jl_ok else 'not available (check build logs)'}",
+              "", "## Data Status", "",
+              f"- Tick symbols: **{len(symbols)}** — {', '.join(symbols[:8])}",
+              f"- Strategies in KB: **{len(kb['strategies'])}**",
+              f"- Formulas in KB: **{len(kb['formulas'])}**",
+              "", "## Backtest Settings", "",
+              f"- WF Windows: `{cfg.WF_WINDOWS}` · IS Ratio: `{cfg.WF_IS_RATIO}`",
+              f"- Min Trades: `{cfg.MIN_TRADES}` · Min Sharpe: `{cfg.MIN_SHARPE}`",
+              f"- Commission: `{cfg.COMMISSION_PCT*100:.3f}%` · Risk/trade: `{cfg.RISK_PER_TRADE*100:.1f}%`",
+              f"- Timeframes: `{', '.join(cfg.BACKTEST_TFS)}`"]
+    return "\n".join(lines)
+# ═══════════════════════════════════════════════════
+#  BUILD APP
+# ═══════════════════════════════════════════════════
+CATS = ["All"] + cfg.CATEGORIES
+with gr.Blocks(
+    title="Quant Knowledge Extractor — Julia Engine",
+    theme=gr.themes.Base(primary_hue="green", neutral_hue="gray"),
+    css=".status-box{font-family:monospace;font-size:.82em}"
+) as demo:
+    gr.HTML("""
+    <div style="text-align:center;padding:1.2em 0 .3em">
+      <h1 style="font-size:2em;color:#16a34a;margin:0">📊 Quant Knowledge Extractor</h1>
+      <p style="color:#6b7280;margin:.4em 0 0">
+        Julia 1.10 Engine · BacktestEngine.jl · WalkForward Optimizer · MT5 .set Output
+      </p>
+    </div>""")
+    with gr.Tabs():
+        # Tab 1 — Extract
+        with gr.Tab("📤 Upload & Extract"):
+            gr.Markdown("### Upload algorithmic trading PDFs — OCR applied automatically")
+            with gr.Row():
+                with gr.Column(scale=2):
+                    pdf_in  = gr.File(label="Drop PDFs here", file_count="multiple", file_types=[".pdf"])
+                    ext_btn = gr.Button("🚀 Extract Knowledge", variant="primary", size="lg")
+                with gr.Column(scale=1):
+                    ext_out = gr.Textbox(label="Result", lines=14, interactive=False, elem_classes=["status-box"])
+            ext_log = gr.Textbox(label="Log", lines=8, interactive=False, elem_classes=["status-box"])
+            ext_btn.click(fn=run_extraction, inputs=[pdf_in], outputs=[ext_out, ext_log])
+        # Tab 2 — Browse
+        with gr.Tab("📚 Knowledge Base"):
+            with gr.Tabs():
+                with gr.Tab("📈 Strategies"):
+                    with gr.Row():
+                        sq = gr.Textbox(label="Search", placeholder="RSI, breakout, Kelly…")
+                        sc = gr.Dropdown(choices=CATS, value="All", label="Category")
+                        sb = gr.Button("🔍 Search", variant="primary")
+                    st = gr.Dataframe(headers=["Name","Category","Description","Sources","Variants"],
+                                     datatype=["str"]*4+["number"], interactive=False)
+                    sn = gr.Markdown("")
+                    with gr.Row():
+                        sni = gr.Textbox(label="Name to download")
+                        sdb = gr.Button("⬇️ Download MD"); sdf = gr.File(label="")
+                    szb = gr.Button("📦 Category ZIP"); szf = gr.File(label="")
+                    sb.click(fn=search_strategies, inputs=[sq,sc], outputs=[st,sn])
+                    sdb.click(fn=dl_strategy, inputs=[sni], outputs=[sdf])
+                    szb.click(fn=dl_all_strategies_zip, inputs=[sc], outputs=[szf])
+                with gr.Tab("∑ Formulas"):
+                    with gr.Row():
+                        fq = gr.Textbox(label="Search", placeholder="Sharpe, Kelly, ATR…")
+                        fb = gr.Button("🔍 Search", variant="primary")
+                    ft = gr.Dataframe(headers=["Name","Category","Purpose","LaTeX","Sources"],
+                                     datatype=["str"]*5, interactive=False)
+                    fb.click(fn=search_formulas, inputs=[fq], outputs=[ft])
+        # Tab 3 — Backtest
+        with gr.Tab("🔬 Julia Backtest"):
+            gr.Markdown(
+                "### Walk-Forward Backtest — Julia Engine\n"
+                "Claude generates Julia signal code → Julia compiles + optimizes → "
+                "MT5 `.set` files pushed to HuggingFace."
+            )
+            with gr.Row():
+                with gr.Column(scale=2):
+                    bt_load  = gr.Button("🔄 Load Symbols from HF")
+                    bt_syms  = gr.CheckboxGroup(label="Symbols", choices=[], value=[])
+                    bt_tfs   = gr.CheckboxGroup(
+                        label="Timeframes", value=["1h","4h"],
+                        choices=["1m","5m","15m","30m","1h","4h","1d"])
+                    bt_filt  = gr.Textbox(label="Strategy filter (optional)")
+                    bt_max   = gr.Slider(0, 500, value=0, step=10, label="Max strategies (0=all)")
+                    bt_viable= gr.Checkbox(label="Push only VIABLE to HuggingFace", value=True)
+                    bt_run   = gr.Button("🚀 Run Julia Backtests", variant="primary", size="lg")
+                with gr.Column(scale=1):
+                    bt_out = gr.Textbox(label="Summary", lines=12, interactive=False, elem_classes=["status-box"])
+            bt_log = gr.Textbox(label="Log", lines=12, interactive=False, elem_classes=["status-box"])
+            bt_load.click(fn=load_symbols, outputs=[bt_syms])
+            bt_run.click(fn=run_backtests,
+                         inputs=[bt_syms, bt_tfs, bt_filt, bt_max, bt_viable],
+                         outputs=[bt_out, bt_log])
+        # Tab 4 — Results
+        with gr.Tab("🏆 Results"):
+            gr.Markdown("### Viable Strategies — Download MT5 `.set` & Julia Configs")
+            res_ref = gr.Button("🔄 Refresh from HuggingFace", variant="primary")
+            res_tbl = gr.Dataframe(
+                headers=["Strategy","Symbol","TF","Sharpe","Max DD","Win%","PF","Score"],
+                datatype=["str"]*8, interactive=False)
+            res_cnt = gr.Markdown("")
+            gr.Markdown("#### Download individual file")
+            with gr.Row():
+                rn = gr.Textbox(label="Strategy name"); rs = gr.Textbox(label="Symbol")
+                rt = gr.Textbox(label="Timeframe")
+                rf = gr.Dropdown(choices=["MT5 .set file","Optimal JSON",
+                                           "Julia config","Full report"],
+                                 value="MT5 .set file", label="File type")
+            rdb = gr.Button("⬇️ Download", variant="primary"); rdf = gr.File(label="")
+            gr.Markdown("#### Batch download all viable strategies")
+            with gr.Row():
+                rsb = gr.Button("🎯 All MT5 .set (ZIP)"); rsf = gr.File(label="")
+            res_ref.click(fn=load_results, outputs=[res_tbl, res_cnt])
+            rdb.click(fn=dl_result_file, inputs=[rn,rs,rt,rf], outputs=[rdf])
+            rsb.click(fn=dl_all_sets, outputs=[rsf])
+            demo.load(fn=load_results, outputs=[res_tbl, res_cnt])
+        # Tab 5 — Setup
+        with gr.Tab("⚙️ Setup & Status"):
+            gr.Markdown("""### Required Secrets (Space Settings → Variables and Secrets)
+| Secret | Description |
+|--------|-------------|
+| `ANTHROPIC_API_KEY` | Claude API key |
+| `HF_TOKEN` | HuggingFace write token |
+| `HF_DATASET_REPO` | `your-username/quant-knowledge-base` |
+| `HF_TICK_REPO` | `your-username/tick-data` |
+### Tick Data Format
+Upload to your `tick-data` dataset:
+```
+EURUSD/ticks.parquet   (columns: timestamp, bid, ask OR open,high,low,close,volume)
+BTCUSDT/1h.parquet     (pre-built OHLCV — faster)
+```
+""")
+            cfg_ref = gr.Button("🔄 Check Status")
+            cfg_out = gr.Markdown(check_config())
+            cfg_ref.click(fn=check_config, outputs=[cfg_out])
+    gr.HTML("""<div style="text-align:center;padding:.8em;color:#9ca3af;font-size:.75em">
+      Quant Knowledge Extractor · Julia 1.10 Engine · HuggingFace Spaces
+    </div>""")
+if __name__ == "__main__":
+    demo.launch()

pipeline/__init__.py ADDED Viewed

File without changes

pipeline/exporter.py ADDED Viewed

	@@ -0,0 +1,202 @@

+"""pipeline/exporter.py — Generate markdown, MT5 .set, Julia config strings."""
+import re, json
+from datetime import datetime
+def slugify(t):
+    t = t.lower().strip()
+    t = re.sub(r"[^\w\s-]","",t)
+    t = re.sub(r"[\s_-]+","-",t)
+    return t[:50]
+def strategy_md(rec):
+    name    = rec.get("name","Unknown")
+    sources = rec.get("sources",[])
+    layers  = rec.get("layers",[])
+    lines   = [f"# {name}", "",
+               f"> **Category:** {rec.get('category','')}  ",
+               f"> **Sources:** {', '.join(sources)[:80]}  ",
+               f"> **ID:** `{rec.get('canonical_id','')}`",
+               "", "---", "", "## Description", "", rec.get("description",""), ""]
+    for label, key in [("## Entry Rules","entry_rules"),("## Exit Rules","exit_rules"),("## Filters","filters")]:
+        items = rec.get(key,[])
+        if items:
+            lines += [label, ""]
+            for i,r in enumerate(items,1): lines.append(f"{i}. {r}")
+            lines.append("")
+    if rec.get("mathematical_basis"):
+        lines += ["## Mathematical Basis", "", rec["mathematical_basis"], ""]
+    if layers:
+        lines += [f"## Variants ({len(layers)} sources)", ""]
+        for i,l in enumerate(layers,1):
+            d = l.get("data",{}); lines.append(f"### Variant {i} — {l.get('source_file','')}")
+            if d.get("description"): lines.append(d["description"])
+            lines.append("")
+    lines += ["---","","*Generated by Quant Knowledge Extractor — Julia Engine*"]
+    return "\n".join(lines)
+def formula_md(rec):
+    lines = [f"# {rec.get('name','Unknown')}", "",
+             f"> **Category:** {rec.get('category','')}  ",
+             f"> **Sources:** {', '.join(rec.get('sources',[]))[:80]}",
+             "", "---", ""]
+    if rec.get("latex"):
+        lines += ["## Formula (LaTeX)", "", "$$", rec["latex"], "$$", ""]
+    if rec.get("plain_text"):
+        lines += ["## Plain Text", "", "```", rec["plain_text"], "```", ""]
+    if rec.get("purpose"):
+        lines += ["## Purpose", "", rec["purpose"], ""]
+    if rec.get("variables"):
+        lines += ["## Variables", "", "| Symbol | Description |", "|--------|-------------|"]
+        for s,d in rec["variables"].items(): lines.append(f"| `{s}` | {d} |")
+        lines.append("")
+    lines += ["---","","*Generated by Quant Knowledge Extractor — Julia Engine*"]
+    return "\n".join(lines)
+def backtest_report_md(result: dict, record: dict) -> str:
+    name   = result.get("strategy","Unknown")
+    sym    = result.get("symbol","")
+    tf     = result.get("timeframe","")
+    viable = result.get("is_viable", False)
+    status = "✅ VIABLE" if viable else "❌ NOT VIABLE"
+    now    = datetime.now().strftime("%Y-%m-%d %H:%M UTC")
+    lines  = [
+        f"# {name}", f"## Backtest Report — {sym} {tf}", "",
+        f"> **Status:** {status}  ",
+        f"> **Robustness:** {result.get('robustness',0):.0f}/100  ",
+        f"> **Engine:** Julia 1.10  ",
+        f"> **Generated:** {now}", "", "---", "", "## Viability", "",
+    ]
+    for r in result.get("reasons",[]): lines.append(f"- {r}")
+    lines += [""]
+    if result.get("optimal_params"):
+        lines += ["## Optimal Parameters (Julia Walk-Forward)", "",
+                  "| Parameter | Value |", "|-----------|-------|"]
+        for k,v in result["optimal_params"].items():
+            lines.append(f"| `{k}` | `{v:.4g}` |")
+        lines += [""]
+    lines += [
+        "## OOS Performance (Walk-Forward Aggregate)", "",
+        "| Metric | Value |", "|--------|-------|",
+        f"| Sharpe (mean±std) | `{result.get('oos_sharpe_mean',0):.3f} ± {result.get('oos_sharpe_std',0):.3f}` |",
+        f"| Win Rate | `{result.get('oos_win_rate',0):.1f}%` |",
+        f"| Max Drawdown (mean) | `{result.get('oos_max_dd',0):.2f}%` |",
+        f"| Profit Factor | `{result.get('oos_pf_mean',0):.3f}` |",
+        f"| Total OOS Trades | `{result.get('oos_trades',0)}` |",
+        f"| WF Efficiency Ratio | `{result.get('wf_efficiency',0):.3f}` |",
+        f"| Robustness Score | `{result.get('robustness',0):.0f}/100` |",
+    ]
+    if record.get("description"):
+        lines += ["", "## Strategy Description", "", record["description"]]
+    lines += ["", "---", "", "*Backtested using Julia 1.10 — BacktestEngine.jl + Optimizer.jl*"]
+    return "\n".join(lines)
+def optimal_json(result: dict, record: dict) -> dict:
+    return {
+        "metadata": {
+            "strategy": result.get("strategy"), "symbol": result.get("symbol"),
+            "timeframe": result.get("timeframe"),
+            "generated": datetime.now().isoformat(),
+            "engine": "Julia 1.10",
+            "is_viable": result.get("is_viable"), "robustness": result.get("robustness"),
+        },
+        "optimal_parameters": result.get("optimal_params",{}),
+        "performance": {
+            "oos_sharpe_mean": round(result.get("oos_sharpe_mean",0),4),
+            "oos_sharpe_std":  round(result.get("oos_sharpe_std",0),4),
+            "oos_win_rate":    round(result.get("oos_win_rate",0),2),
+            "oos_max_dd_pct":  round(result.get("oos_max_dd",0),2),
+            "oos_pf_mean":     round(result.get("oos_pf_mean",0),4),
+            "oos_total_trades":result.get("oos_trades",0),
+            "wf_efficiency":   round(result.get("wf_efficiency",0),4),
+        },
+        "viability": {"is_viable": result.get("is_viable"), "reasons": result.get("reasons",[])},
+    }
+def mt5_set(result: dict, record: dict) -> str:
+    name   = result.get("strategy","Unknown")
+    sym    = result.get("symbol",""); tf = result.get("timeframe","")
+    params = result.get("optimal_params",{})
+    lines  = [
+        f"; MT5 Strategy Tester Set File — Generated by Julia Engine",
+        f"; Strategy: {name}", f"; Symbol: {sym}  Timeframe: {tf}",
+        f"; OOS Sharpe: {result.get('oos_sharpe_mean',0):.3f}  Score: {result.get('robustness',0):.0f}/100",
+        f"; Viable: {result.get('is_viable', False)}",
+        f";", f"; 1. Open MT5 → View → Strategy Tester",
+        f"; 2. Select Expert Advisor", f"; 3. Inputs → Open → select this file", "",
+    ]
+    for k, v in params.items():
+        mt5k = "".join(w.capitalize() for w in k.replace("-","_").split("_"))
+        try:
+            fv   = float(v); step = max(1.0, abs(fv)*0.1)
+            lines.append(f"{mt5k}={fv:.4g}||{max(0,fv-step*3):.4g}||{step:.4g}||{fv+step*3:.4g}||1")
+        except: lines.append(f"{mt5k}={v}")
+    lines += ["","RiskPercent=1.0||0.5||0.5||3.0||1","LotSize=0.1||0.01||0.01||1.0||1"]
+    return "\n".join(lines)
+def julia_config(result: dict) -> str:
+    name   = result.get("strategy","Unknown")
+    sym    = result.get("symbol",""); tf = result.get("timeframe","")
+    struct = "".join(w.capitalize() for w in name.split()[:4])
+    params = result.get("optimal_params",{})
+    plines = "\n    ".join(
+        f'{k.lower().replace("-","_")} = {float(v):.6g}'
+        for k,v in params.items()
+    ) or "# no parameters"
+    return f'''# Optimal Parameters — {name}
+# Engine: Julia 1.10 BacktestEngine.jl + Optimizer.jl
+# Symbol: {sym}  Timeframe: {tf}
+# OOS Sharpe: {result.get("oos_sharpe_mean",0):.3f}  Robustness: {result.get("robustness",0):.0f}/100
+# Viable: {result.get("is_viable",False)}
+# Generated: {datetime.now().strftime("%Y-%m-%d")}
+function optimal_params()::{struct}Params
+    return {struct}Params(
+        {plines}
+    )
+end
+const BACKTEST_METADATA = (
+    strategy   = "{name}",
+    symbol     = "{sym}",
+    timeframe  = "{tf}",
+    engine     = "Julia 1.10",
+    oos_sharpe = {result.get("oos_sharpe_mean",0):.4f},
+    max_dd_pct = {result.get("oos_max_dd",0):.2f},
+    win_rate   = {result.get("oos_win_rate",0):.1f},
+    wf_eff     = {result.get("wf_efficiency",0):.4f},
+    viable     = {str(result.get("is_viable",False)).lower()},
+)
+'''
+def index_md(all_results: list) -> str:
+    viable = sorted([r for r in all_results if r.get("is_viable")],
+                    key=lambda r: r.get("oos_sharpe_mean",0), reverse=True)
+    not_v  = [r for r in all_results if not r.get("is_viable")]
+    now    = datetime.now().strftime("%Y-%m-%d %H:%M UTC")
+    lines  = [
+        "# Backtest Results Index — Julia Engine", "",
+        f"> Generated: {now}  Engine: Julia 1.10  ",
+        f"> Total: {len(all_results)}  Viable: {len(viable)}", "", "---", "",
+        "## ✅ Viable Strategies (by OOS Sharpe)", "",
+        "| # | Strategy | Symbol | TF | Sharpe | DD% | Win% | PF | Score |",
+        "|---|----------|--------|----|--------|-----|------|----|-------|",
+    ]
+    for i,r in enumerate(viable,1):
+        lines.append(
+            f"| {i} | {r.get('strategy','')[:35]} | {r.get('symbol','')} | {r.get('timeframe','')} | "
+            f"{r.get('oos_sharpe_mean',0):.2f} | {r.get('oos_max_dd',0):.1f} | "
+            f"{r.get('oos_win_rate',0):.1f} | {r.get('oos_pf_mean',0):.2f} | {r.get('robustness',0):.0f} |")
+    lines += ["", "## ❌ Not Viable", "", "| Strategy | Symbol | TF | Reason |", "|----------|--------|----|--------|"]
+    for r in not_v[:30]:
+        reasons = r.get("reasons",[])
+        reason = reasons[0][:60] if reasons else ""
+        lines.append(f"| {r.get('strategy','')[:35]} | {r.get('symbol','')} | {r.get('timeframe','')} | {reason} |")
+    return "\n".join(lines)

pipeline/extractor.py ADDED Viewed

	@@ -0,0 +1,153 @@

+"""pipeline/extractor.py — Claude API extraction + 3-layer deduplication."""
+import json, time, hashlib
+from typing import Optional
+import numpy as np
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+import anthropic
+from loguru import logger
+import utils.config as cfg
+class AIExtractor:
+    MODEL = "claude-sonnet-4-20250514"
+    def __init__(self):
+        self.client = anthropic.Anthropic(api_key=cfg.ANTHROPIC_API_KEY)
+        self.tokens_used = 0
+    def extract(self, chunk) -> dict:
+        if chunk.word_count < 20:
+            return {"strategies":[],"formulas":[],"systems":[]}
+        prompt = cfg.EXTRACTION_PROMPT.format(
+            source_file=chunk.source_file, page_start=chunk.page_start,
+            page_end=chunk.page_end, text=chunk.text)
+        raw = self._call(prompt)
+        if not raw: return {"strategies":[],"formulas":[],"systems":[]}
+        return self._parse(raw, chunk)
+    def _call(self, prompt, retries=3):
+        delay = 2.0
+        for attempt in range(retries):
+            try:
+                resp = self.client.messages.create(
+                    model=self.MODEL, max_tokens=4096,
+                    messages=[{"role":"user","content":prompt}])
+                self.tokens_used += resp.usage.input_tokens + resp.usage.output_tokens
+                return resp.content[0].text if resp.content else ""
+            except anthropic.RateLimitError:
+                logger.warning(f"Rate limit — {delay}s")
+                time.sleep(delay); delay *= 2
+            except Exception as e:
+                logger.error(f"API: {e}")
+                if attempt == retries-1: return ""
+                time.sleep(delay); delay *= 2
+        return ""
+    def _parse(self, raw, chunk):
+        raw = raw.strip()
+        if raw.startswith("```"): raw = "\n".join(raw.split("\n")[1:]).rstrip("`").strip()
+        try: data = json.loads(raw)
+        except:
+            try:
+                s=raw.find("{"); e=raw.rfind("}")
+                data=json.loads(raw[s:e+1]) if s!=-1 else {}
+            except: return {"strategies":[],"formulas":[],"systems":[]}
+        result = {}
+        for kind in ("strategies","formulas","systems"):
+            result[kind] = []
+            for item in data.get(kind,[]):
+                if isinstance(item,dict) and item.get("name"):
+                    item.update({"source_file":chunk.source_file,
+                                 "source_pages":f"{chunk.page_start}-{chunk.page_end}"})
+                    item["content_hash"] = _hash(
+                        item.get("description","") + item.get("plain_text","") +
+                        item.get("entry_system","") + item.get("name",""))
+                    result[kind].append(item)
+        return result
+    def compile_strategy_code(self, record: dict) -> str:
+        """Ask Claude to generate Julia signal code for this strategy."""
+        compact = {k: record.get(k) for k in
+            ("name","category","description","entry_rules","exit_rules",
+             "filters","parameters","mathematical_basis")}
+        prompt = cfg.COMPILER_PROMPT.format(
+            strategy_json=json.dumps(compact, indent=2))
+        code = self._call(prompt)
+        if not code: return ""
+        if "```" in code:
+            lines = code.split("\n")
+            in_block = False; out = []
+            for line in lines:
+                if line.strip().startswith("```"): in_block = not in_block; continue
+                if in_block: out.append(line)
+            code = "\n".join(out)
+        return code.strip()
+class Deduplicator:
+    def __init__(self, threshold=None):
+        self.threshold = threshold or cfg.SIMILARITY_THRESHOLD
+        self._vec = TfidfVectorizer(ngram_range=(1,2), max_features=5000, stop_words="english")
+    def process(self, extracted, kb):
+        stats = {k:{"added":0,"merged":0,"skipped":0} for k in ("strategies","formulas","systems")}
+        for kind in ("strategies","formulas","systems"):
+            for item in extracted.get(kind,[]):
+                stats[kind][self._process_one(item, kb[kind], kind)] += 1
+        return stats
+    def _process_one(self, item, store, kind):
+        h = item.get("content_hash","")
+        for e in store.values():
+            if e.get("content_hash") == h:
+                self._add_src(item, e); return "skipped"
+        sid = self._similar(item, store, kind)
+        if sid: self._merge(item, store[sid]); return "merged"
+        cid = _cid(item["name"], h, kind)
+        item["canonical_id"] = cid
+        item["sources"]  = [item.get("source_file","")]
+        item["layers"]   = []
+        store[cid]       = item
+        return "added"
+    def _similar(self, item, store, kind):
+        if not store: return None
+        texts = [_text(v,kind) for v in store.values()] + [_text(item,kind)]
+        try:
+            mat  = self._vec.fit_transform(texts)
+            sims = cosine_similarity(mat[-1], mat[:-1])[0]
+            idx  = int(np.argmax(sims))
+            if sims[idx] >= self.threshold:
+                return list(store.keys())[idx]
+        except: pass
+        return None
+    @staticmethod
+    def _add_src(item, existing):
+        s = item.get("source_file","")
+        if s and s not in existing.get("sources",[]):
+            existing.setdefault("sources",[]).append(s)
+    @staticmethod
+    def _merge(item, existing):
+        Deduplicator._add_src(item, existing)
+        layers = existing.setdefault("layers",[])
+        if item.get("content_hash") not in {l.get("content_hash") for l in layers}:
+            layers.append({"source_file":item.get("source_file"),
+                           "content_hash":item.get("content_hash"),
+                           "data":{k:v for k,v in item.items()
+                                   if k not in ("sources","layers","canonical_id")}})
+def _hash(text):
+    return hashlib.sha256(" ".join(text.lower().split()).encode()).hexdigest()[:16]
+def _cid(name, h, kind):
+    return hashlib.md5(f"{kind}_{name}_{h}".encode()).hexdigest()[:12]
+def _text(item, kind):
+    if kind=="strategies":
+        return f"{item.get('name','')} {item.get('description','')} {' '.join(item.get('entry_rules',[]))}"
+    if kind=="formulas":
+        return f"{item.get('name','')} {item.get('plain_text','')} {item.get('purpose','')}"
+    return f"{item.get('name','')} {item.get('entry_system','')} {item.get('exit_system','')}"

pipeline/julia_bridge.py ADDED Viewed

	@@ -0,0 +1,212 @@

+"""
+pipeline/julia_bridge.py
+─────────────────────────────────────────────────────
+Python bridge to Julia via juliacall.
+Julia is loaded ONCE on first use (lazy init) to avoid
+slowing down Space startup. Subsequent calls are instant.
+juliacall converts:
+  Python list/np.ndarray  →  Julia Vector{Float64}
+  Python dict             →  Julia Dict
+  Julia Dict{String,Any}  →  Python dict
+  Julia Vector            →  Python list
+All heavy computation — indicators, backtest, optimizer —
+runs in Julia. Python only calls this bridge.
+"""
+from __future__ import annotations
+import os
+from pathlib import Path
+from typing import Optional
+import numpy as np
+from loguru import logger
+# ── Julia runtime (loaded once) ───────────────────────
+_jl = None
+_QE = None   # QuantEngine module
+JULIA_SRC = Path(__file__).parent.parent / "src"
+def _init_julia():
+    """Lazy-load Julia + QuantEngine on first call."""
+    global _jl, _QE
+    if _jl is not None:
+        return
+    logger.info("Initializing Julia runtime…")
+    try:
+        from juliacall import Main as jl
+        _jl = jl
+        # Load the QuantEngine module
+        _jl.seval(f'push!(LOAD_PATH, "{JULIA_SRC}")')
+        _jl.seval(f'include("{JULIA_SRC / "QuantEngine.jl"}")')
+        _jl.seval("using .QuantEngine")
+        _QE = _jl.QuantEngine
+        logger.info("Julia QuantEngine loaded ✓")
+    except Exception as e:
+        logger.error(f"Julia init failed: {e}")
+        raise RuntimeError(f"Julia init failed: {e}") from e
+def _arr(x) -> "jl.Vector":
+    """Convert Python list/ndarray to Julia Vector{Float64}."""
+    _init_julia()
+    import numpy as np
+    arr = np.asarray(x, dtype=np.float64)
+    return _jl.convert(_jl.Vector[_jl.Float64], arr.tolist())
+def _signals(x) -> "jl.Vector":
+    """Convert signal array to Julia Vector{Int}."""
+    _init_julia()
+    arr = [int(v) for v in x]
+    return _jl.convert(_jl.Vector[_jl.Int64], arr)
+def _pydict(jl_dict) -> dict:
+    """Recursively convert Julia Dict to Python dict."""
+    out = {}
+    for k in jl_dict.keys():
+        v = jl_dict[k]
+        k_py = str(k)
+        if hasattr(v, "keys"):
+            out[k_py] = _pydict(v)
+        elif hasattr(v, "__iter__") and not isinstance(v, str):
+            out[k_py] = list(v)
+        elif isinstance(v, bool):
+            out[k_py] = bool(v)
+        elif hasattr(v, "__float__"):
+            out[k_py] = float(v)
+        elif hasattr(v, "__int__"):
+            out[k_py] = int(v)
+        else:
+            out[k_py] = v
+    return out
+# ── Public API ────────────────────────────────────────
+def full_backtest_pipeline(
+    strategy_code:  str,
+    strategy_name:  str,
+    open_p:         list | np.ndarray,
+    high:           list | np.ndarray,
+    low:            list | np.ndarray,
+    close:          list | np.ndarray,
+    volume:         list | np.ndarray,
+    timeframe:      str,
+    symbol:         str,
+    n_windows:      int   = 5,
+    is_ratio:       float = 0.70,
+    min_trades:     int   = 30,
+    min_sharpe:     float = 0.5,
+    max_combos:     int   = 300,
+    initial_equity: float = 10_000.0,
+    commission_pct: float = 0.0002,
+    risk_per_trade: float = 0.01,
+) -> dict:
+    """
+    Run full Julia backtest pipeline.
+    Compile strategy code → walk-forward optimize → return results dict.
+    """
+    _init_julia()
+    try:
+        result = _QE.full_backtest_pipeline(
+            strategy_code, strategy_name,
+            _arr(open_p), _arr(high), _arr(low),
+            _arr(close),  _arr(volume),
+            timeframe, symbol,
+            n_windows      = n_windows,
+            is_ratio       = is_ratio,
+            min_trades     = min_trades,
+            min_sharpe     = min_sharpe,
+            max_combos     = max_combos,
+            initial_equity = initial_equity,
+            commission_pct = commission_pct,
+            risk_per_trade = risk_per_trade,
+        )
+        return _pydict(result)
+    except Exception as e:
+        logger.error(f"Julia pipeline error [{strategy_name}]: {e}")
+        return {
+            "is_valid": False,
+            "error": str(e),
+            "strategy": strategy_name,
+            "symbol": symbol,
+            "timeframe": timeframe,
+        }
+def run_backtest_only(
+    open_p: np.ndarray, high: np.ndarray, low: np.ndarray,
+    close: np.ndarray, volume: np.ndarray,
+    signals: list | np.ndarray,
+    timeframe: str = "1h",
+    initial_equity: float = 10_000.0,
+    commission_pct: float = 0.0002,
+    risk_per_trade: float = 0.01,
+) -> dict:
+    """Run a single backtest with pre-computed signals."""
+    _init_julia()
+    try:
+        cfg = _QE.BacktestConfig(
+            initial_equity=initial_equity,
+            commission_pct=commission_pct,
+            risk_per_trade=risk_per_trade,
+        )
+        r = _QE.run_backtest(
+            _arr(open_p), _arr(high), _arr(low),
+            _arr(close),  _arr(volume),
+            _signals(signals), timeframe, cfg,
+        )
+        return {
+            "total_return":  float(r.total_return),
+            "cagr":          float(r.cagr),
+            "sharpe":        float(r.sharpe),
+            "sortino":       float(r.sortino),
+            "calmar":        float(r.calmar),
+            "max_dd":        float(r.max_dd),
+            "n_trades":      int(r.n_trades),
+            "win_rate":      float(r.win_rate),
+            "profit_factor": float(r.profit_factor),
+            "final_equity":  float(r.final_equity),
+            "equity_curve":  list(r.equity_curve),
+            "is_valid":      bool(r.is_valid),
+            "error":         str(r.error_msg),
+        }
+    except Exception as e:
+        return {"is_valid": False, "error": str(e)}
+def indicator(name: str, *args, **kwargs):
+    """
+    Call any indicator by name from Python.
+    Returns numpy array.
+    Example:
+        rsi_values = indicator("rsi", close_array, 14)
+        upper, mid, lower = indicator("bbands", close_array, 20, 2.0)
+    """
+    _init_julia()
+    fn = getattr(_QE, name, None)
+    if fn is None:
+        raise ValueError(f"Unknown indicator: {name}")
+    jl_args = [_arr(a) if isinstance(a, (list, np.ndarray)) else a
+               for a in args]
+    result = fn(*jl_args, **kwargs)
+    if isinstance(result, tuple):
+        return tuple(np.array(list(r)) for r in result)
+    return np.array(list(result))
+def julia_available() -> bool:
+    """Check if Julia runtime is available."""
+    try:
+        _init_julia()
+        return _QE is not None
+    except Exception:
+        return False

pipeline/pdf_processor.py ADDED Viewed

	@@ -0,0 +1,88 @@

+"""pipeline/pdf_processor.py — PDF text extraction + OCR fallback."""
+import re
+from pathlib import Path
+from typing import Generator
+from dataclasses import dataclass
+import fitz, pdfplumber, pytesseract
+from pdf2image import convert_from_path
+from loguru import logger
+import utils.config as cfg
+@dataclass
+class Chunk:
+    source_file: str
+    page_start:  int
+    page_end:    int
+    text:        str
+    was_ocr:     bool = False
+    @property
+    def word_count(self): return len(self.text.split())
+class PDFProcessor:
+    MIN_CHARS   = 80
+    CHUNK_WORDS = cfg.MAX_TOKENS_PER_CHUNK // 2
+    def process(self, pdf_path: Path) -> Generator[Chunk, None, None]:
+        pages = self._extract_pages(pdf_path)
+        yield from self._chunk(pages, pdf_path.name)
+    def _extract_pages(self, path):
+        mu = {}
+        try:
+            doc = fitz.open(str(path))
+            for i, pg in enumerate(doc): mu[i+1] = self._clean(pg.get_text("text"))
+            doc.close()
+        except: pass
+        pl = {}
+        try:
+            with pdfplumber.open(str(path)) as pdf:
+                for i, pg in enumerate(pdf.pages):
+                    try: pl[i+1] = self._clean(pg.extract_text() or "")
+                    except: pl[i+1] = ""
+        except: pass
+        total = max(len(mu), len(pl), 1)
+        results = []; ocr_needed = []
+        for pnum in range(1, total+1):
+            best = mu.get(pnum,"") if len(mu.get(pnum,"")) > len(pl.get(pnum,"")) else pl.get(pnum,"")
+            if len(best) >= self.MIN_CHARS: results.append((pnum, best, False))
+            else: results.append((pnum, best, False)); ocr_needed.append(pnum)
+        if ocr_needed:
+            ocr = self._ocr(path, ocr_needed)
+            for i,(pnum,_,_) in enumerate(results):
+                if pnum in ocr: results[i] = (pnum, ocr[pnum], True)
+        return results
+    def _ocr(self, path, pages):
+        out = {}
+        try:
+            imgs = convert_from_path(str(path), dpi=cfg.OCR_DPI,
+                first_page=min(pages), last_page=max(pages))
+            for i, pnum in enumerate(range(min(pages), max(pages)+1)):
+                if pnum in pages and i < len(imgs):
+                    out[pnum] = self._clean(pytesseract.image_to_string(imgs[i],lang="eng",config="--psm 6"))
+        except Exception as e: logger.warning(f"OCR: {e}")
+        return out
+    def _chunk(self, pages, source):
+        buf, words, p_start, any_ocr = [], 0, 1, False
+        for pnum, text, ocr in pages:
+            if not text: continue
+            buf.append(text); words += len(text.split())
+            if ocr: any_ocr = True
+            if words >= self.CHUNK_WORDS:
+                yield Chunk(source, p_start, pnum, "\n\n".join(buf), any_ocr)
+                buf, words, p_start, any_ocr = [text], len(text.split()), pnum, ocr
+        if buf:
+            last = pages[-1][0] if pages else p_start
+            yield Chunk(source, p_start, last, "\n\n".join(buf), any_ocr)
+    @staticmethod
+    def _clean(text):
+        if not text: return ""
+        text = re.sub(r"(\w)-\n(\w)", r"\1\2", text)
+        text = re.sub(r"\n{3,}", "\n\n", text)
+        text = re.sub(r"[ \t]+", " ", text)
+        return text.strip()

requirements.txt ADDED Viewed

	@@ -0,0 +1,32 @@

+# Julia↔Python bridge
+juliacall>=0.9.23
+# Gradio UI
+gradio>=5.25.0
+# Claude API
+anthropic>=0.30.0
+# HuggingFace — no version pin, Spaces base image controls this
+huggingface_hub
+# PDF processing
+pypdf>=4.2.0
+pdfplumber>=0.11.0
+pymupdf>=1.24.0
+pdf2image>=1.17.0
+pytesseract>=0.3.10
+Pillow>=10.3.0
+# Data
+numpy>=1.26.0
+pandas>=2.2.0
+pyarrow>=16.0.0
+# Deduplication (used in extractor.py)
+scikit-learn>=1.5.0
+# Utilities
+loguru>=0.7.2
+pydantic>=2.7.0
+tqdm>=4.66.0

src/BacktestEngine.jl ADDED Viewed

	@@ -0,0 +1,148 @@

+"""
+BacktestEngine.jl — Vectorized backtest engine.
+No includes. Receives Indicators module via QuantEngine parent scope.
+"""
+module BacktestEngine
+using Statistics
+export run_backtest, BacktestResult, BacktestConfig
+# Indicators injected by QuantEngine before this module is used
+# atr() is accessed via the parent module's scope at call time
+const BARS_PER_YEAR = Dict(
+    "1m"=>525_600,"3m"=>175_200,"5m"=>105_120,"15m"=>35_040,"30m"=>17_520,
+    "1h"=>8_760,"2h"=>4_380,"4h"=>2_190,"6h"=>1_460,"12h"=>730,
+    "1d"=>252,"1w"=>52,
+)
+Base.@kwdef struct BacktestConfig
+    initial_equity :: Float64 = 10_000.0
+    commission_pct :: Float64 = 0.0002
+    slippage_pct   :: Float64 = 0.0001
+    risk_per_trade :: Float64 = 0.01
+    atr_mult       :: Float64 = 2.0
+    max_pos_pct    :: Float64 = 0.20
+    atr_period     :: Int     = 14
+end
+mutable struct BacktestResult
+    total_return   :: Float64; cagr          :: Float64
+    sharpe         :: Float64; sortino       :: Float64; calmar :: Float64
+    max_dd         :: Float64; max_dd_bars   :: Int
+    n_trades       :: Int;     n_wins        :: Int;     win_rate       :: Float64
+    profit_factor  :: Float64; avg_win_pct   :: Float64; avg_loss_pct   :: Float64
+    expectancy     :: Float64; avg_bars_held :: Float64
+    max_consec_wins:: Int;     max_consec_loss:: Int
+    final_equity   :: Float64; total_comm    :: Float64
+    equity_curve   :: Vector{Float64}
+    n_bars         :: Int;     is_valid      :: Bool;    error_msg :: String
+end
+BacktestResult(; n_bars=0, is_valid=false, error_msg="") = BacktestResult(
+    0.0,0.0,0.0,0.0,0.0,0.0,0, 0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,
+    10_000.0,0.0, Float64[], n_bars,is_valid,error_msg)
+function run_backtest(
+    open_p::Vector{Float64}, high::Vector{Float64}, low::Vector{Float64},
+    close::Vector{Float64},  volume::Vector{Float64}, signals::Vector{Int},
+    timeframe::String="1h",  cfg::BacktestConfig=BacktestConfig(),
+    atr_fn::Function=identity,  # passed from QuantEngine to avoid circular dep
+)::BacktestResult
+    n = length(close)
+    n < 50 && return BacktestResult(; n_bars=n, error_msg="Need ≥50 bars, got $n")
+    atr_v = atr_fn(high, low, close, cfg.atr_period)
+    equity = cfg.initial_equity
+    eq     = fill(cfg.initial_equity, n)
+    tpnls = Vector{Float64}(undef, n÷2+1)
+    twins = Vector{Bool}(undef,    n÷2+1)
+    tbars = Vector{Int}(undef,     n÷2+1)
+    tents = Vector{Float64}(undef, n÷2+1)
+    tszs  = Vector{Float64}(undef, n÷2+1)
+    nt    = 0; tcomm = 0.0
+    pos=0; epx=0.0; psz=0.0; spx=0.0; ebar=1; ltrade=0
+    @inbounds for i in 2:n
+        px=close[i]; sig=signals[i]
+        if pos != 0
+            hit = (pos==1 && low[i]<=spx) || (pos==-1 && high[i]>=spx)
+            if hit
+                ep = spx*(1.0+cfg.slippage_pct*pos)
+                pnl = pos*(ep-epx)*psz; comm=(epx+ep)*psz*cfg.commission_pct
+                nt+=1; tpnls[nt]=pnl-comm; twins[nt]=pnl>comm
+                tbars[nt]=i-ebar; tents[nt]=epx; tszs[nt]=psz
+                tcomm+=comm; equity+=pnl-comm; pos=0; ltrade=i
+            end
+        end
+        if pos!=0 && (sig==0 || sig==-pos)
+            ep=px*(1.0+cfg.slippage_pct*pos)
+            pnl=pos*(ep-epx)*psz; comm=(epx+ep)*psz*cfg.commission_pct
+            nt+=1; tpnls[nt]=pnl-comm; twins[nt]=pnl>comm
+            tbars[nt]=i-ebar; tents[nt]=epx; tszs[nt]=psz
+            tcomm+=comm; equity+=pnl-comm; pos=0; ltrade=i
+        end
+        if pos==0 && sig!=0 && (i-ltrade)>=1
+            ep=px*(1.0+cfg.slippage_pct*sig)
+            av = isnan(atr_v[i]) ? px*0.01 : atr_v[i]
+            dist=cfg.atr_mult*av
+            sz=min(equity*cfg.risk_per_trade/max(dist,1e-8), equity*cfg.max_pos_pct/ep)
+            sz=max(sz,1e-8)
+            pos=sig; epx=ep; psz=sz; spx=ep-sig*dist; ebar=i
+        end
+        eq[i] = equity + (pos!=0 ? pos*(close[i]-epx)*psz : 0.0)
+    end
+    if pos!=0
+        ep=close[n]; pnl=pos*(ep-epx)*psz; comm=(epx+ep)*psz*cfg.commission_pct
+        nt+=1; tpnls[nt]=pnl-comm; twins[nt]=pnl>comm
+        tbars[nt]=n-ebar; tents[nt]=epx; tszs[nt]=psz
+        tcomm+=comm; equity+=pnl-comm; eq[n]=equity
+    end
+    return _metrics(eq, tpnls[1:nt], twins[1:nt], tbars[1:nt],
+                    tents[1:nt], tszs[1:nt], tcomm, n, timeframe, cfg)
+end
+function _metrics(eq,pnls,wins,bars,ents,szs,tcomm,n_bars,tf,cfg)
+    init=cfg.initial_equity; final=eq[end]; bpy=get(BARS_PER_YEAR,tf,252)
+    r=BacktestResult(;n_bars,is_valid=true)
+    r.equity_curve=eq; r.final_equity=final; r.total_comm=tcomm
+    r.total_return=(final-init)/init*100.0
+    yrs=n_bars/bpy
+    r.cagr = yrs>0&&final>0 ? ((final/init)^(1.0/yrs)-1.0)*100.0 : 0.0
+    peak=eq[1]; mxdd=0.0; ddr=0; mxddb=0
+    for v in eq
+        peak=max(peak,v); dd=(peak-v)/peak; mxdd=max(mxdd,dd)
+        v<peak ? (ddr+=1; mxddb=max(mxddb,ddr)) : (ddr=0)
+    end
+    r.max_dd=mxdd*100.0; r.max_dd_bars=mxddb
+    rets=diff(eq)./eq[1:end-1]; filter!(!isnan,rets)
+    if length(rets)>1
+        mu=mean(rets); sg=std(rets)
+        ds_v=filter(x->x<0,rets); ds=length(ds_v)>1 ? std(ds_v) : sg
+        af=sqrt(Float64(bpy))
+        r.sharpe=sg>0 ? mu/sg*af : 0.0; r.sortino=ds>0 ? mu/ds*af : 0.0
+        r.calmar=r.max_dd>0 ? r.cagr/r.max_dd : 0.0
+    end
+    r.n_trades=length(pnls)
+    r.n_trades==0 && return r
+    nw=count(wins); r.n_wins=nw; r.win_rate=nw/r.n_trades*100.0
+    gw=sum(pnls[wins]); gl=abs(sum(pnls[.!wins]))
+    r.profit_factor=gl>0 ? gw/gl : (gw>0 ? Inf : 0.0)
+    pct=pnls./(ents.*szs.+1e-10).*100.0
+    r.avg_win_pct  = nw>0            ? mean(pct[wins])   : 0.0
+    r.avg_loss_pct = (r.n_trades-nw)>0 ? mean(pct[.!wins]) : 0.0
+    r.expectancy=r.win_rate/100.0*r.avg_win_pct+(1-r.win_rate/100.0)*r.avg_loss_pct
+    r.avg_bars_held=mean(Float64.(bars))
+    r.max_consec_wins=_maxrun(wins); r.max_consec_loss=_maxrun(.!wins)
+    return r
+end
+function _maxrun(b::Vector{Bool})::Int
+    mx=run=0; for v in b; v ? (run+=1;mx=max(mx,run)) : (run=0); end; return mx
+end
+end # module BacktestEngine

src/Indicators.jl ADDED Viewed

	@@ -0,0 +1,223 @@

+"""
+Indicators.jl — Vectorized technical indicator library.
+Standalone module. No includes. No external deps beyond Statistics.
+"""
+module Indicators
+using Statistics
+export sma, ema, wma, tema, dema,
+       rsi, macd, stoch, cci, williams_r,
+       atr, bbands, keltner, donchian, adx,
+       vwap, obv, cmf, zscore, std_dev,
+       momentum, roc, highest, lowest,
+       crossover, crossunder
+# ── Trend ─────────────────────────────────────────────
+function sma(x::Vector{Float64}, n::Int)::Vector{Float64}
+    len = length(x); out = fill(NaN, len); s = 0.0
+    for i in 1:len
+        s += x[i]
+        if i >= n
+            i > n && (s -= x[i-n])
+            out[i] = s / n
+        end
+    end
+    return out
+end
+function ema(x::Vector{Float64}, n::Int)::Vector{Float64}
+    len = length(x); out = fill(NaN, len)
+    k = 2.0 / (n + 1.0)
+    # seed: SMA of first n non-NaN values
+    s = 0.0; cnt = 0; seed_i = 0
+    for i in 1:len
+        isnan(x[i]) && continue
+        s += x[i]; cnt += 1
+        if cnt == n
+            seed_i = i; out[i] = s / n
+            val = out[i]
+            for j in (i+1):len
+                isnan(x[j]) && continue
+                val = x[j] * k + val * (1.0 - k)
+                out[j] = val
+            end
+            break
+        end
+    end
+    return out
+end
+function wma(x::Vector{Float64}, n::Int)::Vector{Float64}
+    len = length(x); out = fill(NaN, len)
+    ws = n * (n+1) / 2.0
+    for i in n:len
+        s = 0.0
+        for j in 1:n; s += x[i-n+j] * j; end
+        out[i] = s / ws
+    end
+    return out
+end
+tema(x::Vector{Float64}, n::Int) = let e1=ema(x,n),e2=ema(e1,n),e3=ema(e2,n); 3.0.*e1 .- 3.0.*e2 .+ e3 end
+dema(x::Vector{Float64}, n::Int) = let e1=ema(x,n),e2=ema(e1,n); 2.0.*e1 .- e2 end
+# ── Oscillators ───────────────────────────────────────
+function rsi(close::Vector{Float64}, n::Int=14)::Vector{Float64}
+    len = length(close); out = fill(NaN, len)
+    ag = 0.0; al = 0.0
+    for i in 2:(n+1)
+        i > len && break
+        d = close[i] - close[i-1]
+        d > 0 ? (ag += d) : (al += abs(d))
+    end
+    ag /= n; al /= n
+    n+1 <= len && (out[n+1] = 100.0 - 100.0/(1.0 + (al==0 ? 1e10 : ag/al)))
+    for i in (n+2):len
+        d = close[i] - close[i-1]
+        g = d > 0 ? d : 0.0; l = d < 0 ? abs(d) : 0.0
+        ag = (ag*(n-1)+g)/n; al = (al*(n-1)+l)/n
+        out[i] = 100.0 - 100.0/(1.0 + (al==0 ? 1e10 : ag/al))
+    end
+    return out
+end
+function macd(close::Vector{Float64}; fast::Int=12, slow::Int=26, sig::Int=9)
+    ml = ema(close,fast) .- ema(close,slow)
+    sl = ema(ml, sig)
+    return ml, sl, ml .- sl
+end
+function stoch(high::Vector{Float64}, low::Vector{Float64}, close::Vector{Float64};
+               k::Int=14, d::Int=3)
+    len = length(close); K = fill(NaN, len)
+    for i in k:len
+        hh = maximum(high[i-k+1:i]); ll = minimum(low[i-k+1:i])
+        K[i] = hh==ll ? 50.0 : 100.0*(close[i]-ll)/(hh-ll)
+    end
+    return K, sma(K, d)
+end
+function cci(high::Vector{Float64}, low::Vector{Float64}, close::Vector{Float64}, n::Int=20)::Vector{Float64}
+    len = length(close); tp = (high.+low.+close)./3.0; out = fill(NaN, len)
+    for i in n:len
+        w = tp[i-n+1:i]; m = mean(w); md = mean(abs.(w.-m))
+        out[i] = md==0 ? 0.0 : (tp[i]-m)/(0.015*md)
+    end
+    return out
+end
+function williams_r(high::Vector{Float64}, low::Vector{Float64}, close::Vector{Float64}, n::Int=14)::Vector{Float64}
+    len = length(close); out = fill(NaN, len)
+    for i in n:len
+        hh = maximum(high[i-n+1:i]); ll = minimum(low[i-n+1:i])
+        out[i] = hh==ll ? -50.0 : -100.0*(hh-close[i])/(hh-ll)
+    end
+    return out
+end
+momentum(x::Vector{Float64}, n::Int=10) = let len=length(x),out=fill(NaN,len); for i in (n+1):len; out[i]=x[i]-x[i-n]; end; out end
+roc(x::Vector{Float64}, n::Int=10) = let len=length(x),out=fill(NaN,len); for i in (n+1):len; out[i]=x[i-n]==0 ? 0.0 : 100.0*(x[i]-x[i-n])/x[i-n]; end; out end
+# ── Volatility ────────────────────────────────────────
+function _tr(high::Vector{Float64}, low::Vector{Float64}, close::Vector{Float64})::Vector{Float64}
+    len = length(close); tr = fill(NaN, len)
+    tr[1] = high[1]-low[1]
+    for i in 2:len; tr[i] = max(high[i]-low[i], abs(high[i]-close[i-1]), abs(low[i]-close[i-1])); end
+    return tr
+end
+atr(high::Vector{Float64}, low::Vector{Float64}, close::Vector{Float64}, n::Int=14) = ema(_tr(high,low,close), n)
+function bbands(close::Vector{Float64}, n::Int=20, k::Float64=2.0)
+    len = length(close); mid = sma(close,n); sd = fill(NaN, len)
+    for i in n:len; sd[i] = std(close[i-n+1:i]; corrected=false); end
+    return mid.+k.*sd, mid, mid.-k.*sd
+end
+function keltner(high::Vector{Float64}, low::Vector{Float64}, close::Vector{Float64}, n::Int=20, k::Float64=2.0)
+    mid = ema(close,n); a = atr(high,low,close,n)
+    return mid.+k.*a, mid, mid.-k.*a
+end
+function donchian(high::Vector{Float64}, low::Vector{Float64}, n::Int=20)
+    len = length(high); u = fill(NaN,len); l = fill(NaN,len)
+    for i in n:len; u[i]=maximum(high[i-n+1:i]); l[i]=minimum(low[i-n+1:i]); end
+    return u, (u.+l)./2.0, l
+end
+function std_dev(x::Vector{Float64}, n::Int=20)::Vector{Float64}
+    len = length(x); out = fill(NaN, len)
+    for i in n:len; out[i] = std(x[i-n+1:i]; corrected=false); end
+    return out
+end
+function zscore(x::Vector{Float64}, n::Int=20)::Vector{Float64}
+    mu = sma(x,n); sd = std_dev(x,n); out = fill(NaN, length(x))
+    for i in eachindex(x)
+        !isnan(mu[i]) && !isnan(sd[i]) && sd[i]>0 && (out[i]=(x[i]-mu[i])/sd[i])
+    end
+    return out
+end
+# ── Trend strength ────────────────────────────────────
+function adx(high::Vector{Float64}, low::Vector{Float64}, close::Vector{Float64}, n::Int=14)
+    tr = _tr(high,low,close)
+    up = diff(vcat(high[1],high)); dn = diff(vcat(low[1],low))
+    pdm = map((u,d)->u>d&&u>0 ? u : 0.0, up, dn)
+    ndm = map((u,d)->d>u&&d>0 ? d : 0.0, up, dn)
+    sm=ema(tr,n); pdi=100.0.*ema(pdm,n)./(sm.+1e-10); ndi=100.0.*ema(ndm,n)./(sm.+1e-10)
+    dx = 100.0.*abs.(pdi.-ndi)./(pdi.+ndi.+1e-10)
+    return ema(dx,n), pdi, ndi
+end
+# ── Volume ────────────────────────────────────────────
+function vwap(high::Vector{Float64}, low::Vector{Float64}, close::Vector{Float64}, volume::Vector{Float64})::Vector{Float64}
+    tp = (high.+low.+close)./3.0
+    return cumsum(tp.*volume)./(cumsum(volume).+1e-10)
+end
+function obv(close::Vector{Float64}, volume::Vector{Float64})::Vector{Float64}
+    len = length(close); out = zeros(Float64, len); out[1] = volume[1]
+    for i in 2:len
+        d = close[i]-close[i-1]
+        out[i] = out[i-1] + (d>0 ? volume[i] : d<0 ? -volume[i] : 0.0)
+    end
+    return out
+end
+function cmf(high::Vector{Float64}, low::Vector{Float64}, close::Vector{Float64},
+             volume::Vector{Float64}, n::Int=20)::Vector{Float64}
+    len = length(close); out = fill(NaN, len)
+    hl = high.-low
+    mfv = map((c,l,h,hl)->hl==0 ? 0.0 : (2c-l-h)/hl, close,low,high,hl).*volume
+    for i in n:len
+        sv = sum(volume[i-n+1:i])
+        out[i] = sv==0 ? 0.0 : sum(mfv[i-n+1:i])/sv
+    end
+    return out
+end
+# ── Utilities ─────────────────────────────────────────
+highest(x::Vector{Float64}, n::Int) = let len=length(x),out=fill(NaN,len); for i in n:len; out[i]=maximum(x[i-n+1:i]); end; out end
+lowest(x::Vector{Float64},  n::Int) = let len=length(x),out=fill(NaN,len); for i in n:len; out[i]=minimum(x[i-n+1:i]); end; out end
+function crossover(a::Vector{Float64}, b::Vector{Float64})::Vector{Bool}
+    len=length(a); out=fill(false,len)
+    for i in 2:len; out[i] = a[i]>b[i] && a[i-1]<=b[i-1]; end
+    return out
+end
+function crossunder(a::Vector{Float64}, b::Vector{Float64})::Vector{Bool}
+    len=length(a); out=fill(false,len)
+    for i in 2:len; out[i] = a[i]<b[i] && a[i-1]>=b[i-1]; end
+    return out
+end
+end # module Indicators

src/Manifest.toml ADDED Viewed

	@@ -0,0 +1,19 @@

+# This file is machine-generated - editing it directly is not recommended
+julia_version = "1.10.7"
+manifest_format = "2.0"
+project_hash = "abc123"
+[[deps.JSON3]]
+deps = ["Dates", "Mmap", "Parsers", "PrecompileTools", "StructTypes", "UUIDs"]
+git-tree-sha1 = "eb3edce0ed4fa32f75a0a11217433c31d56bd48b"
+uuid = "0f8b85d8-7e73-4b43-9b43-f8e4f07d6bcd"
+version = "1.14.0"
+[[deps.Random]]
+deps = ["SHA"]
+uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+[[deps.Statistics]]
+deps = ["LinearAlgebra", "SparseArrays"]
+uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+version = "1.10.0"

src/Optimizer.jl ADDED Viewed

	@@ -0,0 +1,162 @@

+"""
+Optimizer.jl — Walk-forward optimization engine.
+No includes. BacktestConfig/run_backtest/BacktestResult received via QuantEngine.
+"""
+module Optimizer
+using Statistics, Random
+export walk_forward_optimize, OptimResult
+mutable struct OptimResult
+    strategy_name::String; symbol::String; timeframe::String
+    optimal_params::Dict{String,Float64}
+    oos_sharpe_mean::Float64; oos_sharpe_std::Float64
+    oos_win_rate::Float64; oos_max_dd::Float64; oos_pf_mean::Float64
+    oos_trades::Int; wf_efficiency::Float64; robustness::Float64
+    is_viable::Bool; reasons::Vector{String}; oos_sharpes::Vector{Float64}
+end
+OptimResult(n,s,t) = OptimResult(n,s,t,Dict{String,Float64}(),
+    0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,false,String[],Float64[])
+function walk_forward_optimize(
+    signal_fn::Function,
+    param_grid::Dict{String,Vector{Float64}},
+    open_p::Vector{Float64}, high::Vector{Float64},
+    low::Vector{Float64},    close::Vector{Float64},
+    volume::Vector{Float64}, timeframe::String,
+    strategy_name::String,   symbol::String;
+    run_bt_fn::Function,          # run_backtest injected from QuantEngine
+    bt_cfg_fn::Function,          # BacktestConfig() constructor injected
+    n_windows::Int=5, is_ratio::Float64=0.70,
+    min_trades::Int=30, min_sharpe::Float64=0.5,
+    max_combos::Int=300,
+)::OptimResult
+    result = OptimResult(strategy_name, symbol, timeframe)
+    n = length(close)
+    n < 200 && (push!(result.reasons,"Need ≥200 bars, got $n"); return result)
+    isempty(param_grid) && (param_grid = Dict{String,Vector{Float64}}())
+    cfg      = bt_cfg_fn()
+    combos   = _build_combos(param_grid, max_combos)
+    windows  = _windows(n, n_windows)
+    isempty(windows) && (push!(result.reasons,"No WF windows"); return result)
+    win_params=Vector{Dict{String,Float64}}()
+    is_sharpes=Float64[]; oos_sharpes=Float64[]
+    oos_results=[]
+    for (is_s,is_e,oos_s,oos_e) in windows
+        best_p=nothing; best_sh=-Inf
+        for p in combos
+            r = _run(signal_fn,run_bt_fn,cfg,
+                     open_p[is_s:is_e],high[is_s:is_e],
+                     low[is_s:is_e],close[is_s:is_e],
+                     volume[is_s:is_e],p,timeframe)
+            r.is_valid && r.n_trades>=min_trades && r.sharpe>best_sh && (best_sh=r.sharpe; best_p=p)
+        end
+        best_p===nothing && continue
+        push!(win_params,best_p); push!(is_sharpes,best_sh)
+        oos_r = _run(signal_fn,run_bt_fn,cfg,
+                     open_p[oos_s:oos_e],high[oos_s:oos_e],
+                     low[oos_s:oos_e],close[oos_s:oos_e],
+                     volume[oos_s:oos_e],best_p,timeframe)
+        push!(oos_results,oos_r); push!(oos_sharpes,oos_r.sharpe)
+    end
+    isempty(oos_results) && (push!(result.reasons,"No valid WF windows"); return result)
+    result.oos_sharpes = oos_sharpes
+    valid = filter(r->r.is_valid && r.n_trades>=min_trades, oos_results)
+    if !isempty(valid)
+        sh=[r.sharpe for r in valid]
+        result.oos_sharpe_mean=mean(sh); result.oos_sharpe_std=std(sh)
+        result.oos_win_rate=mean([r.win_rate for r in valid])
+        result.oos_max_dd=mean([r.max_dd for r in valid])
+        pfs=filter(x->x<100,[r.profit_factor for r in valid])
+        result.oos_pf_mean=isempty(pfs) ? 0.0 : mean(pfs)
+        result.oos_trades=sum(r.n_trades for r in valid)
+    end
+    if !isempty(is_sharpes) && !isempty(oos_sharpes)
+        mis=mean(is_sharpes); mos=mean(oos_sharpes)
+        result.wf_efficiency = mis>0 ? mos/mis : 0.0
+    end
+    result.optimal_params = _vote(win_params, oos_sharpes)
+    result.robustness = _robustness(result, min_trades)
+    result.is_viable, result.reasons = _viability(result, min_trades, min_sharpe)
+    return result
+end
+function _run(sig_fn,run_bt,cfg,o,h,l,c,v,params,tf)
+    try
+        sigs = sig_fn(o,h,l,c,v,params)
+        return run_bt(o,h,l,c,v,sigs,tf,cfg)
+    catch e
+        # Return an invalid result
+        r = run_bt(o,h,l,c,v,zeros(Int,length(c)),tf,cfg)
+        r.is_valid = false; r.error_msg = string(e)
+        return r
+    end
+end
+function _build_combos(grid::Dict{String,Vector{Float64}}, max_c::Int)::Vector{Dict{String,Float64}}
+    isempty(grid) && return [Dict{String,Float64}()]
+    ks=collect(keys(grid)); vs=[grid[k] for k in ks]
+    all_c=Dict{String,Float64}[]
+    function recurse(i,current)
+        if i>length(ks); push!(all_c,copy(current)); return; end
+        for v in vs[i]; current[ks[i]]=v; recurse(i+1,current); end
+    end
+    recurse(1,Dict{String,Float64}())
+    length(all_c)>max_c && (all_c=all_c[randperm(length(all_c))[1:max_c]])
+    return all_c
+end
+function _windows(n::Int,nw::Int)::Vector{Tuple{Int,Int,Int,Int}}
+    osz=max(50,n÷(nw*2)); wins=Tuple{Int,Int,Int,Int}[]
+    for i in 0:(nw-1)
+        oe=n-i*osz; os=oe-osz+1; ie=os-1
+        ie-1<100||oe-os<50 && continue
+        push!(wins,(1,ie,os,oe))
+    end
+    return reverse(wins)
+end
+function _vote(pl::Vector{Dict{String,Float64}}, oos::Vector{Float64})::Dict{String,Float64}
+    isempty(pl) && return Dict{String,Float64}()
+    length(pl)==1 && return pl[1]
+    w=max.(0.0,oos[1:length(pl)]); tw=sum(w)
+    w = tw>0 ? w./tw : fill(1.0/length(pl),length(pl))
+    ks=collect(keys(pl[1])); result=Dict{String,Float64}()
+    for k in ks
+        vals=[p[k] for p in pl if haskey(p,k)]
+        wi=w[1:length(vals)]
+        si=sortperm(vals); cv=cumsum(wi[si])
+        mi=findfirst(x->x>=0.5,cv)
+        result[k]=vals[si[mi!==nothing ? mi : end]]
+    end
+    return result
+end
+function _robustness(r::OptimResult, mt::Int)::Float64
+    s=clamp(r.wf_efficiency,0.0,1.0)*40.0
+    r.oos_sharpe_mean>0 && (s+=clamp(1.0-r.oos_sharpe_std/(r.oos_sharpe_mean+1e-9),0.0,1.0)*30.0)
+    s+=clamp(r.oos_trades/max(1,mt*10),0.0,1.0)*20.0
+    r.oos_pf_mean>1 && (s+=clamp((r.oos_pf_mean-1)/2,0.0,1.0)*10.0)
+    return round(s;digits=1)
+end
+function _viability(r::OptimResult,mt::Int,ms::Float64)::Tuple{Bool,Vector{String}}
+    reasons=String[]
+    r.oos_sharpe_mean<ms  && push!(reasons,"OOS Sharpe $(round(r.oos_sharpe_mean;digits=2)) < $ms")
+    r.oos_trades<mt       && push!(reasons,"Too few OOS trades: $(r.oos_trades) < $mt")
+    r.oos_max_dd>30.0     && push!(reasons,"High avg DD: $(round(r.oos_max_dd;digits=1))%")
+    r.wf_efficiency<0.3   && push!(reasons,"Low WFE: $(round(r.wf_efficiency;digits=2))")
+    r.oos_pf_mean<1.1     && push!(reasons,"PF $(round(r.oos_pf_mean;digits=2)) < 1.1")
+    viable=isempty(reasons)
+    viable && push!(reasons,"✅ Sharpe=$(round(r.oos_sharpe_mean;digits=2)) DD=$(round(r.oos_max_dd;digits=1))% WFE=$(round(r.wf_efficiency;digits=2)) Score=$(r.robustness)/100")
+    return viable,reasons
+end
+end # module Optimizer

src/Project.toml ADDED Viewed

	@@ -0,0 +1,7 @@

+[deps]
+JSON3    = "0f8b85d8-7e73-4b43-9b43-f8e4f07d6bcd"
+Random   = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+[compat]
+julia = "1.10"

src/QuantEngine.jl ADDED Viewed

	@@ -0,0 +1,103 @@

+"""
+QuantEngine.jl — Top-level module. Only file that uses include().
+Wires all submodules together by injecting dependencies explicitly.
+Python imports this via juliacall.
+"""
+module QuantEngine
+using Statistics, Random
+# ── Include all submodules (ONLY here) ───────────────
+include("Indicators.jl")
+include("BacktestEngine.jl")
+include("Optimizer.jl")
+include("SignalCompiler.jl")
+using .Indicators
+using .BacktestEngine
+using .Optimizer
+using .SignalCompiler
+export
+    # Indicators
+    sma, ema, wma, tema, dema,
+    rsi, macd, stoch, cci, williams_r,
+    atr, bbands, keltner, donchian, adx,
+    vwap, obv, cmf, zscore, std_dev,
+    momentum, roc, highest, lowest,
+    crossover, crossunder,
+    # Engine
+    BacktestConfig,
+    # High-level
+    full_backtest_pipeline
+"""
+    full_backtest_pipeline(...) -> Dict{String,Any}
+End-to-end: compile Julia strategy code → walk-forward optimize
+→ return plain Dict that crosses to Python cleanly.
+"""
+function full_backtest_pipeline(
+    strategy_code::String, strategy_name::String,
+    open_p::Vector{Float64}, high::Vector{Float64},
+    low::Vector{Float64},    close::Vector{Float64},
+    volume::Vector{Float64}, timeframe::String, symbol::String;
+    n_windows::Int=5, is_ratio::Float64=0.70,
+    min_trades::Int=30, min_sharpe::Float64=0.5,
+    max_combos::Int=300,
+    initial_equity::Float64=10_000.0,
+    commission_pct::Float64=0.0002,
+    risk_per_trade::Float64=0.01,
+)::Dict{String,Any}
+    # 1. Compile strategy — pass Indicators module explicitly
+    compiled = SignalCompiler.compile_strategy(strategy_name, strategy_code, Indicators)
+    if !compiled.is_valid
+        return Dict{String,Any}("is_valid"=>false,"error"=>compiled.error,
+            "strategy"=>strategy_name,"symbol"=>symbol,"timeframe"=>timeframe)
+    end
+    # 2. Walk-forward optimize — inject BacktestEngine functions to avoid circular deps
+    cfg_fn = () -> BacktestConfig(
+        initial_equity=initial_equity,
+        commission_pct=commission_pct,
+        risk_per_trade=risk_per_trade,
+    )
+    # Wrap run_backtest to inject atr function from Indicators
+    run_bt_fn = (o,h,l,c,v,sigs,tf,cfg) ->
+        BacktestEngine.run_backtest(o,h,l,c,v,sigs,tf,cfg, Indicators.atr)
+    opt = Optimizer.walk_forward_optimize(
+        compiled.generate_fn,
+        compiled.param_grid_fn(),
+        open_p, high, low, close, volume,
+        timeframe, strategy_name, symbol;
+        run_bt_fn=run_bt_fn,
+        bt_cfg_fn=cfg_fn,
+        n_windows=n_windows, is_ratio=is_ratio,
+        min_trades=min_trades, min_sharpe=min_sharpe,
+        max_combos=max_combos,
+    )
+    return Dict{String,Any}(
+        "is_valid"        => true,
+        "strategy"        => opt.strategy_name,
+        "symbol"          => opt.symbol,
+        "timeframe"       => opt.timeframe,
+        "optimal_params"  => opt.optimal_params,
+        "oos_sharpe_mean" => opt.oos_sharpe_mean,
+        "oos_sharpe_std"  => opt.oos_sharpe_std,
+        "oos_win_rate"    => opt.oos_win_rate,
+        "oos_max_dd"      => opt.oos_max_dd,
+        "oos_pf_mean"     => opt.oos_pf_mean,
+        "oos_trades"      => opt.oos_trades,
+        "wf_efficiency"   => opt.wf_efficiency,
+        "robustness"      => opt.robustness,
+        "is_viable"       => opt.is_viable,
+        "reasons"         => opt.reasons,
+        "oos_sharpes"     => opt.oos_sharpes,
+    )
+end
+end # module QuantEngine

src/SignalCompiler.jl ADDED Viewed

	@@ -0,0 +1,101 @@

+"""
+SignalCompiler.jl — Compile AI-generated Julia strategy code.
+No includes. Indicators functions injected explicitly into sandbox.
+"""
+module SignalCompiler
+using Statistics, Random
+export compile_strategy, CompiledStrategy
+struct CompiledStrategy
+    name          :: String
+    generate_fn   :: Function
+    param_grid_fn :: Function
+    is_valid      :: Bool
+    error         :: String
+end
+CompiledStrategy(name::String; error::String="") =
+    CompiledStrategy(name,
+        (o,h,l,c,v,p)->zeros(Int,length(c)),
+        ()->Dict{String,Vector{Float64}}(),
+        false, error)
+"""
+    compile_strategy(name, code, indicator_module) -> CompiledStrategy
+indicator_module is the Indicators module, passed from QuantEngine.
+"""
+function compile_strategy(name::String, code::String, ind_mod::Module)::CompiledStrategy
+    safe = replace(replace(name," "=>"_"), r"[^\w]"=>"x")
+    sandbox = Module(Symbol("S_"*safe*"_"*string(rand(UInt16),base=16)))
+    # Inject all exported Indicators functions
+    for fn_name in names(ind_mod; all=false)
+        fn_name === :Indicators && continue
+        try
+            Core.eval(sandbox,
+                Expr(:const, Expr(:(=), fn_name, getfield(ind_mod, fn_name))))
+        catch; end
+    end
+    # Inject Statistics
+    for sym in (:mean,:std,:var,:median,:cor,:cov)
+        try Core.eval(sandbox, Expr(:const, Expr(:(=),sym,getfield(Statistics,sym)))); catch; end
+    end
+    # Inject safe Base
+    for sym in (:length,:size,:zeros,:ones,:fill,:similar,
+                :sum,:prod,:diff,:cumsum,:cumprod,
+                :max,:min,:abs,:sqrt,:log,:exp,:floor,:ceil,:round,:clamp,
+                :isnan,:isinf,:isfinite,:sign,
+                :sort,:sortperm,:reverse,:unique,:findall,:findfirst,
+                :push!,:append!,:pop!,:first,:last,:eachindex,
+                :map,:filter,:any,:all,:count,
+                :Int,:Int64,:Float64,:Bool,
+                :Dict,:Vector,:Tuple,:Set,
+                :NaN,:Inf,:pi,:true,:false,
+                :println,:string,:get)
+        try Core.eval(sandbox, Expr(:const, Expr(:(=),sym,getfield(Base,sym)))); catch
+            try Core.eval(sandbox, Expr(:const, Expr(:(=),sym,eval(sym)))); catch; end
+        end
+    end
+    parsed = try Meta.parseall(code)
+    catch e; return CompiledStrategy(name; error="Parse: $(sprint(showerror,e))"); end
+    try Core.eval(sandbox, parsed)
+    catch e; return CompiledStrategy(name; error="Eval: $(sprint(showerror,e))"); end
+    isdefined(sandbox,:get_param_grid) ||
+        return CompiledStrategy(name; error="Missing: get_param_grid()")
+    isdefined(sandbox,:generate_signals) ||
+        return CompiledStrategy(name; error="Missing: generate_signals(o,h,l,c,v,params)")
+    gen_fn  = getfield(sandbox, :generate_signals)
+    grid_fn = getfield(sandbox, :get_param_grid)
+    err = _smoke(gen_fn, grid_fn)
+    err != "" && return CompiledStrategy(name; error=err)
+    return CompiledStrategy(name, gen_fn, grid_fn, true, "")
+end
+function _smoke(gen_fn, grid_fn)::String
+    try
+        grid=grid_fn()
+        grid isa Dict || return "get_param_grid() must return Dict"
+        params=Dict{String,Float64}(k=>Float64(v isa Vector && !isempty(v) ? v[1] : 0) for (k,v) in grid)
+        n=200; c=100.0.*exp.(cumsum(randn(n).*0.005))
+        h=c.*(1.0.+abs.(randn(n)).*0.003); l=c.*(1.0.-abs.(randn(n)).*0.003)
+        o=c.*(1.0.+randn(n).*0.001);       v=abs.(randn(n)).*1000.0.+500.0
+        sigs=gen_fn(o,h,l,c,v,params)
+        sigs isa Vector    || return "generate_signals must return Vector, got $(typeof(sigs))"
+        length(sigs)!=n    && return "Signal length $(length(sigs)) ≠ $n"
+        any(s->!(s in (-1,0,1)), sigs) && return "Values must be in {-1,0,1}"
+    catch e; return "Smoke: $(sprint(showerror,e))"; end
+    return ""
+end
+end # module SignalCompiler

src/strategy_template.jl ADDED Viewed

	@@ -0,0 +1,118 @@

+# ═══════════════════════════════════════════════════════════════════
+#  JULIA STRATEGY TEMPLATE
+#  This is the exact format Claude generates for each strategy.
+#  Two functions required. No module/using declarations needed —
+#  all Indicators functions are pre-injected by SignalCompiler.jl.
+# ═══════════════════════════════════════════════════════════════════
+# ── Example: EMA Crossover Strategy ─────────────────────────────────
+"""
+Return parameter ranges for walk-forward grid search.
+Keys must be valid Julia identifiers. Values are Float64 ranges.
+"""
+function get_param_grid() :: Dict{String, Vector{Float64}}
+    return Dict(
+        "fast_period" => [10.0, 15.0, 20.0, 25.0],
+        "slow_period" => [40.0, 50.0, 60.0, 80.0],
+        "atr_filter"  => [14.0],                     # single value = no optimization
+    )
+end
+"""
+Generate trading signals from OHLCV arrays.
+Arguments (all same length n):
+  open_p, high, low, close, volume :: Vector{Float64}
+  params :: Dict{String,Float64}   — one value per key from get_param_grid()
+Returns Vector{Int} of length n:
+  1  = enter/hold long
+  -1 = enter/hold short
+  0  = flat / no position
+Rules:
+  - Return 0 for the first ~slow_period bars (warmup / NaN period)
+  - Always use isnan() checks before comparisons
+  - Signals are position signals, not entry triggers
+    (engine manages entries/exits from signal transitions)
+"""
+function generate_signals(
+    open_p :: Vector{Float64},
+    high   :: Vector{Float64},
+    low    :: Vector{Float64},
+    close  :: Vector{Float64},
+    volume :: Vector{Float64},
+    params :: Dict{String, Float64},
+) :: Vector{Int}
+    n         = length(close)
+    fast_p    = Int(round(get(params, "fast_period", 20.0)))
+    slow_p    = Int(round(get(params, "slow_period", 50.0)))
+    atr_p     = Int(round(get(params, "atr_filter",  14.0)))
+    fast_ema  = ema(close, fast_p)
+    slow_ema  = ema(close, slow_p)
+    atr_vals  = atr(high, low, close, atr_p)
+    signals   = zeros(Int, n)
+    for i in (slow_p + 1):n
+        # Skip if any indicator is NaN (still in warmup)
+        isnan(fast_ema[i]) && continue
+        isnan(slow_ema[i]) && continue
+        isnan(atr_vals[i]) && continue
+        # Optional: ATR volatility filter — only trade when market is moving
+        atr_threshold = close[i] * 0.001   # 0.1% of price
+        atr_vals[i] < atr_threshold && continue
+        if fast_ema[i] > slow_ema[i]
+            signals[i] = 1    # bullish: long
+        elseif fast_ema[i] < slow_ema[i]
+            signals[i] = -1   # bearish: short
+        else
+            signals[i] = 0    # neutral
+        end
+    end
+    return signals
+end
+# ═══════════════════════════════════════════════════════════════════
+#  Example 2: RSI Mean Reversion
+# ═══════════════════════════════════════════════════════════════════
+# function get_param_grid()
+#     return Dict(
+#         "rsi_period"    => [7.0, 10.0, 14.0, 21.0],
+#         "oversold"      => [25.0, 30.0, 35.0],
+#         "overbought"    => [65.0, 70.0, 75.0],
+#         "ma_period"     => [20.0, 50.0],
+#     )
+# end
+#
+# function generate_signals(open_p, high, low, close, volume, params)
+#     n          = length(close)
+#     rsi_p      = Int(round(get(params, "rsi_period",  14.0)))
+#     oversold   = get(params, "oversold",  30.0)
+#     overbought = get(params, "overbought", 70.0)
+#     ma_p       = Int(round(get(params, "ma_period",   50.0)))
+#
+#     rsi_vals   = rsi(close, rsi_p)
+#     trend_ma   = sma(close, ma_p)
+#     signals    = zeros(Int, n)
+#
+#     for i in (ma_p + rsi_p + 1):n
+#         isnan(rsi_vals[i]) && continue
+#         isnan(trend_ma[i]) && continue
+#
+#         # Mean reversion: buy oversold in uptrend, sell overbought in downtrend
+#         if rsi_vals[i] < oversold && close[i] > trend_ma[i]
+#             signals[i] = 1
+#         elseif rsi_vals[i] > overbought && close[i] < trend_ma[i]
+#             signals[i] = -1
+#         end
+#     end
+#     return signals
+# end

src/warmup.jl ADDED Viewed

	@@ -0,0 +1,47 @@

+push!(LOAD_PATH, @__DIR__)
+include(joinpath(@__DIR__, "QuantEngine.jl"))
+using .QuantEngine
+using Statistics, Random
+println("Warming up all Julia hot paths...")
+n=500; c=100.0.*exp.(cumsum(randn(n).*0.005))
+h=c.*(1.0.+abs.(randn(n)).*0.005); l=c.*(1.0.-abs.(randn(n)).*0.005)
+o=c.*(1.0.+randn(n).*0.002);       v=abs.(randn(n)).*2000.0.+1000.0
+_=sma(c,20);         println("  sma ✓")
+_=ema(c,20);         println("  ema ✓")
+_=rsi(c,14);         println("  rsi ✓")
+_=macd(c);           println("  macd ✓")
+_=atr(h,l,c,14);    println("  atr ✓")
+_=bbands(c,20,2.0);  println("  bbands ✓")
+_=donchian(h,l,20);  println("  donchian ✓")
+_=adx(h,l,c,14);    println("  adx ✓")
+_=stoch(h,l,c);      println("  stoch ✓")
+_=zscore(c,20);      println("  zscore ✓")
+println("All indicators warmed ✓")
+code = """
+function get_param_grid() :: Dict{String, Vector{Float64}}
+    return Dict("period" => [10.0, 20.0, 30.0])
+end
+function generate_signals(open_p, high, low, close, volume, params)
+    n = length(close)
+    p = Int(round(get(params, "period", 20.0)))
+    ma = sma(close, p)
+    signals = zeros(Int, n)
+    for i in (p+1):n
+        isnan(ma[i]) && continue
+        signals[i] = close[i] > ma[i] ? 1 : -1
+    end
+    return signals
+end
+"""
+result = full_backtest_pipeline(
+    code, "WarmupTest",
+    o, h, l, c, v, "1h", "TEST";
+    n_windows=2, max_combos=3, min_trades=1,
+)
+println("full_backtest_pipeline: is_valid=$(result[\"is_valid\"]) viable=$(result[\"is_viable\"])")
+println("\n✅ Julia warmup complete — all hot paths compiled.")

src/warmup_bridge.py ADDED Viewed

	@@ -0,0 +1,30 @@

+"""
+warmup_bridge.py
+Pre-warms the juliacall Python↔Julia bridge at build time.
+Called from Dockerfile Step 3 — runs once, caches the Julia
+session location so runtime startup is instant.
+"""
+import os
+import sys
+os.environ["JULIA_PROJECT"]     = "/app/src"
+os.environ["JULIA_DEPOT_PATH"]  = "/app/.julia"
+print("Pre-warming juliacall bridge...")
+try:
+    from juliacall import Main as jl
+    jl.seval('push!(LOAD_PATH, "/app/src")')
+    jl.seval('include("/app/src/QuantEngine.jl")')
+    jl.seval("using .QuantEngine")
+    # Quick sanity check — call one indicator through the bridge
+    import numpy as np
+    c = (100.0 * np.exp(np.cumsum(np.random.randn(100) * 0.005))).tolist()
+    result = jl.QuantEngine.sma(jl.convert(jl.Vector[jl.Float64], c), 20)
+    assert len(result) == 100
+    print("juliacall bridge warmed up ✓")
+    sys.exit(0)
+except Exception as e:
+    print(f"WARNING: juliacall warmup failed: {e}")
+    print("App will still work — Julia initialises on first request instead.")
+    sys.exit(0)  # Non-fatal — don't break the build

utils/__init__.py ADDED Viewed

File without changes

utils/config.py ADDED Viewed

	@@ -0,0 +1,125 @@

+"""utils/config.py — reads from HF Spaces Secrets (env vars)."""
+import os
+from pathlib import Path
+TMP = Path("/tmp/quant")
+for d in ["pdfs","tick_cache","compiled","exports"]:
+    (TMP / d).mkdir(parents=True, exist_ok=True)
+def get(k, default=""): return os.environ.get(k, default)
+ANTHROPIC_API_KEY = get("ANTHROPIC_API_KEY")
+HF_TOKEN          = get("HF_TOKEN")
+HF_DATASET_REPO   = get("HF_DATASET_REPO")
+HF_TICK_REPO      = get("HF_TICK_REPO")
+SIMILARITY_THRESHOLD = float(get("SIMILARITY_THRESHOLD", "0.85"))
+MAX_TOKENS_PER_CHUNK = int(get("MAX_TOKENS_PER_CHUNK", "3000"))
+OCR_DPI              = int(get("OCR_DPI", "300"))
+INITIAL_EQUITY   = float(get("INITIAL_EQUITY",   "10000"))
+COMMISSION_PCT   = float(get("COMMISSION_PCT",   "0.0002"))
+RISK_PER_TRADE   = float(get("RISK_PER_TRADE",   "0.01"))
+WF_WINDOWS       = int(get("WF_WINDOWS",         "5"))
+WF_IS_RATIO      = float(get("WF_IS_RATIO",      "0.70"))
+MAX_PARAM_COMBOS = int(get("MAX_PARAM_COMBOS",   "300"))
+MIN_TRADES       = int(get("MIN_TRADES",         "30"))
+MIN_SHARPE       = float(get("MIN_SHARPE",       "0.5"))
+BACKTEST_TFS     = get("BACKTEST_TIMEFRAMES", "1h,4h,1d").split(",")
+CATEGORIES = [
+    "Trend Following","Mean Reversion","Statistical Arbitrage",
+    "Momentum","Breakout","Volatility Trading","Market Making",
+    "Pattern Recognition","Machine Learning","Options Strategy",
+    "High Frequency","Pairs Trading","Carry Trade",
+    "Seasonal / Calendar","Risk Management","Position Sizing",
+    "Portfolio Construction","Market Microstructure","Other",
+]
+EXTRACTION_PROMPT = """
+You are a quantitative finance knowledge extraction engine.
+Extract ALL trading strategies, mathematical formulas, and complete trading systems
+from the text below (taken from an algorithmic trading book).
+Output ONLY valid JSON — no markdown fences, no preamble:
+{
+  "strategies": [{
+    "name": "string", "category": "string", "description": "string",
+    "entry_rules": ["string"], "exit_rules": ["string"],
+    "filters": ["string"], "timeframes": ["string"], "instruments": ["string"],
+    "parameters": {"name": "description with typical value"},
+    "mathematical_basis": "string", "source_context": "string"
+  }],
+  "formulas": [{
+    "name": "string", "category": "string",
+    "latex": "LaTeX string", "plain_text": "string",
+    "variables": {"symbol": "description"},
+    "purpose": "string", "usage_context": "string", "source_context": "string"
+  }],
+  "systems": [{
+    "name": "string", "components": ["string"],
+    "entry_system": "string", "exit_system": "string",
+    "risk_management": "string", "position_sizing": "string",
+    "backtesting_notes": "string", "source_context": "string"
+  }]
+}
+Rules: empty arrays [] if nothing found. Preserve exact math. Include LaTeX.
+Source: {source_file} | Pages: {page_start}–{page_end}
+--- TEXT ---
+{text}
+--- END ---
+""".strip()
+COMPILER_PROMPT = """
+You are a Julia algorithmic trading code generator.
+Convert the strategy JSON below into executable Julia code.
+Output ONLY the Julia code — no markdown fences, no explanation, no module/using declarations.
+EXACT REQUIRED FORMAT (two functions, nothing else):
+function get_param_grid() :: Dict{{String, Vector{{Float64}}}}
+    return Dict(
+        "param_name" => [val1, val2, val3],
+    )
+end
+function generate_signals(
+    open_p :: Vector{{Float64}},
+    high   :: Vector{{Float64}},
+    low    :: Vector{{Float64}},
+    close  :: Vector{{Float64}},
+    volume :: Vector{{Float64}},
+    params :: Dict{{String, Float64}},
+) :: Vector{{Int}}
+    n = length(close)
+    signals = zeros(Int, n)
+    # ... your logic here ...
+    return signals
+end
+RULES (CRITICAL — violations cause compile failure):
+1. NO module, NO using, NO include statements
+2. ALWAYS check isnan() before using indicator values
+3. Return signals[i] = 0 during indicator warmup period
+4. Values: 1=long, -1=short, 0=flat only
+5. Get int params: Int(round(get(params, "key", default)))
+6. Get float params: get(params, "key", default)
+AVAILABLE FUNCTIONS (pre-injected, call directly without prefix):
+Trend:    sma(s,n)  ema(s,n)  wma(s,n)  tema(s,n)  dema(s,n)
+Momentum: rsi(c,n)  macd(c;fast,slow,sig)->(ml,sl,hist)  momentum(s,n)  roc(s,n)
+Bands:    bbands(c,n,k)->(up,mid,lo)  keltner(h,l,c,n,k)->(up,mid,lo)
+Channel:  donchian(h,l,n)->(up,mid,lo)  highest(s,n)  lowest(s,n)
+Volatility: atr(h,l,c,n)  std_dev(s,n)  zscore(s,n)
+Oscillators: stoch(h,l,c;k,d)->(K,D)  cci(h,l,c,n)  williams_r(h,l,c,n)
+Volume:   vwap(h,l,c,v)  obv(c,v)  cmf(h,l,c,v,n)
+Trend strength: adx(h,l,c,n)->(adx,pdi,ndi)
+Crosses:  crossover(a,b)->Bool[]  crossunder(a,b)->Bool[]
+Math:     mean(v)  std(v)  diff(v)  cumsum(v)  abs(x)  sqrt(x)
+Strategy:
+{strategy_json}
+""".strip()

utils/hf_io.py ADDED Viewed

	@@ -0,0 +1,192 @@

+"""utils/hf_io.py — All HuggingFace Hub read/write."""
+import io, json
+from pathlib import Path
+from typing import Optional
+import pandas as pd
+from huggingface_hub import HfApi, hf_hub_download, list_repo_files, CommitOperationAdd
+from loguru import logger
+import utils.config as cfg
+def _api(): return HfApi(token=cfg.HF_TOKEN)
+# ── Knowledge base ─────────────────────────────────────
+def kb_load() -> dict:
+    empty = {"strategies": {}, "formulas": {}, "systems": {}}
+    if not cfg.HF_DATASET_REPO: return empty
+    try:
+        path = hf_hub_download(
+            repo_id=cfg.HF_DATASET_REPO, filename="knowledge_base.jsonl",
+            repo_type="dataset", token=cfg.HF_TOKEN,
+            local_dir=str(cfg.TMP), force_download=True,
+        )
+        result = {"strategies": {}, "formulas": {}, "systems": {}}
+        with open(path, encoding="utf-8") as f:
+            for line in f:
+                line = line.strip()
+                if not line: continue
+                rec = json.loads(line)
+                kind = rec.get("_type", ""); cid = rec.get("canonical_id", "")
+                if kind in result and cid: result[kind][cid] = rec
+        logger.info(f"KB: {len(result['strategies'])} strats, {len(result['formulas'])} formulas")
+        return result
+    except Exception as e:
+        logger.warning(f"KB load (may not exist yet): {e}")
+        return empty
+def kb_save(kb: dict) -> bool:
+    if not cfg.HF_DATASET_REPO: return False
+    try:
+        lines = []
+        for kind in ("strategies","formulas","systems"):
+            for rec in kb[kind].values():
+                lines.append(json.dumps({**rec, "_type": kind}))
+        _api().upload_file(
+            path_or_fileobj=io.BytesIO("\n".join(lines).encode()),
+            path_in_repo="knowledge_base.jsonl",
+            repo_id=cfg.HF_DATASET_REPO, repo_type="dataset",
+            commit_message="Update knowledge base",
+        )
+        return True
+    except Exception as e:
+        logger.error(f"KB save: {e}"); return False
+# ── Tick data ──────────────────────────────────────────
+def tick_list_symbols() -> list[str]:
+    if not cfg.HF_TICK_REPO: return []
+    try:
+        files = list(list_repo_files(repo_id=cfg.HF_TICK_REPO,
+                                      repo_type="dataset", token=cfg.HF_TOKEN))
+        seen = set(); syms = []
+        for f in files:
+            parts = f.split("/")
+            if len(parts) >= 2 and parts[0] not in seen:
+                seen.add(parts[0]); syms.append(parts[0])
+        return sorted(syms)
+    except Exception as e:
+        logger.warning(f"Tick symbols: {e}"); return []
+def tick_load(symbol: str, timeframe: str = "1h") -> Optional[pd.DataFrame]:
+    cache = cfg.TMP / "tick_cache" / f"{symbol}_{timeframe}.parquet"
+    if cache.exists(): return pd.read_parquet(cache)
+    if not cfg.HF_TICK_REPO: return None
+    for fname in [f"{timeframe}.parquet", f"{timeframe}.csv",
+                  "ticks.parquet", "data.parquet"]:
+        df = _try_dl(symbol, fname)
+        if df is not None:
+            df = _norm_ohlcv(df, timeframe if fname.startswith("tick") or fname=="data.parquet" else None)
+            if df is not None and not df.empty:
+                df.to_parquet(cache); return df
+    return None
+def _try_dl(sym, fname):
+    try:
+        local = cfg.TMP / "tick_cache" / sym
+        local.mkdir(parents=True, exist_ok=True)
+        path = hf_hub_download(repo_id=cfg.HF_TICK_REPO,
+            filename=f"{sym}/{fname}", repo_type="dataset",
+            token=cfg.HF_TOKEN, local_dir=str(local), force_download=False)
+        return pd.read_parquet(path) if fname.endswith(".parquet") else pd.read_csv(path)
+    except Exception:
+        return None
+_TF_MAP = {"1m":"1min","5m":"5min","15m":"15min","30m":"30min",
+           "1h":"1h","4h":"4h","1d":"1D","1w":"1W"}
+def _norm_ohlcv(df: pd.DataFrame, resample_to=None) -> Optional[pd.DataFrame]:
+    import numpy as np
+    df = df.copy()
+    ts = next((c for c in df.columns if "time" in c.lower() or "date" in c.lower()), None)
+    if ts: df.index = pd.to_datetime(df[ts], utc=True); df = df.drop(columns=[ts])
+    else:
+        try: df.index = pd.to_datetime(df.index, utc=True)
+        except: return None
+    df.index = df.index.tz_convert("UTC") if df.index.tz else df.index.tz_localize("UTC")
+    df = df.sort_index()
+    if resample_to:
+        price_col = next((c for c in df.columns if c.lower() in ("bid","mid","price","close")), None)
+        if price_col is None: return None
+        if "bid" in df.columns and "ask" in df.columns:
+            df["_price"] = (df["bid"] + df["ask"]) / 2
+        else: df["_price"] = df[price_col]
+        rule = _TF_MAP.get(resample_to, "1h")
+        ohlcv = df["_price"].resample(rule).ohlc()
+        ohlcv.columns = ["open","high","low","close"]
+        vcol = next((c for c in df.columns if "vol" in c.lower()), None)
+        ohlcv["volume"] = df[vcol].resample(rule).sum() if vcol else df["_price"].resample(rule).count()
+        return ohlcv.dropna()
+    renames = {}
+    for c in df.columns:
+        lc = c.lower()
+        if lc in ("o","open"): renames[c]="open"
+        elif lc in ("h","high"): renames[c]="high"
+        elif lc in ("l","low"): renames[c]="low"
+        elif lc in ("c","close"): renames[c]="close"
+        elif lc in ("v","vol","volume","tick_volume"): renames[c]="volume"
+    df = df.rename(columns=renames)
+    for col in ["open","high","low","close"]:
+        if col not in df.columns: return None
+    if "volume" not in df.columns: df["volume"] = 0.0
+    df = df[["open","high","low","close","volume"]].astype(float).dropna(subset=["open","high","low","close"])
+    bad = df["high"] < df["low"]
+    if bad.any(): df.loc[bad,["high","low"]] = df.loc[bad,["low","high"]].values
+    return df
+# ── Batch push ─────────────────────────────────────────
+def push_batch(files: list[tuple[str, bytes]], msg="Update") -> int:
+    if not cfg.HF_DATASET_REPO or not files: return 0
+    ops = [CommitOperationAdd(path_in_repo=p, path_or_fileobj=io.BytesIO(c)) for p,c in files]
+    pushed = 0
+    for i in range(0, len(ops), 100):
+        try:
+            _api().create_commit(repo_id=cfg.HF_DATASET_REPO, repo_type="dataset",
+                operations=ops[i:i+100], commit_message=f"{msg} [{i+1}–{i+len(ops[i:i+100])}]")
+            pushed += len(ops[i:i+100])
+        except Exception as e: logger.error(f"Batch push: {e}")
+    return pushed
+def push_result(name, symbol, tf, report, opt_json, mt5_set, julia_cfg) -> bool:
+    from pipeline.exporter import slugify
+    sl = slugify(name); pre = f"{sl}_{symbol}_{tf}"
+    files = [
+        (f"backtests/{sl}/{pre}_report.md", report.encode()),
+        (f"optimal_sets/{pre}_optimal.json", json.dumps(opt_json,indent=2).encode()),
+        (f"optimal_sets/{pre}.set", mt5_set.encode()),
+        (f"optimal_sets/{pre}_config.jl", julia_cfg.encode()),
+    ]
+    return push_batch(files, f"Backtest: {name} {symbol} {tf}") == 4
+def push_index(md: str, data: dict) -> bool:
+    return push_batch([
+        ("optimal_sets/BACKTEST_INDEX.md", md.encode()),
+        ("optimal_sets/backtest_index.json", json.dumps(data,indent=2).encode()),
+    ], "Update index") == 2
+def fetch_index() -> dict:
+    try:
+        path = hf_hub_download(repo_id=cfg.HF_DATASET_REPO,
+            filename="optimal_sets/backtest_index.json",
+            repo_type="dataset", token=cfg.HF_TOKEN,
+            local_dir=str(cfg.TMP), force_download=True)
+        return json.loads(Path(path).read_text())
+    except: return {}
+def fetch_file(remote: str) -> Optional[bytes]:
+    try:
+        path = hf_hub_download(repo_id=cfg.HF_DATASET_REPO,
+            filename=remote, repo_type="dataset", token=cfg.HF_TOKEN,
+            local_dir=str(cfg.TMP/"downloads"), force_download=True)
+        return Path(path).read_bytes()
+    except: return None
+def pdf_upload(pdf_path: Path) -> str:
+    if not cfg.HF_DATASET_REPO: return ""
+    try:
+        return str(_api().upload_file(path_or_fileobj=str(pdf_path),
+            path_in_repo=f"pdfs/{pdf_path.name}",
+            repo_id=cfg.HF_DATASET_REPO, repo_type="dataset",
+            commit_message=f"Add PDF: {pdf_path.name}"))
+    except Exception as e:
+        logger.warning(f"PDF upload: {e}"); return ""