Spaces:
Sleeping
Sleeping
Upload 22 files
Browse files- Dockerfile +54 -0
- README.md +25 -0
- app.py +482 -0
- pipeline/__init__.py +0 -0
- pipeline/exporter.py +202 -0
- pipeline/extractor.py +153 -0
- pipeline/julia_bridge.py +212 -0
- pipeline/pdf_processor.py +88 -0
- requirements.txt +32 -0
- src/BacktestEngine.jl +148 -0
- src/Indicators.jl +223 -0
- src/Manifest.toml +19 -0
- src/Optimizer.jl +162 -0
- src/Project.toml +7 -0
- src/QuantEngine.jl +103 -0
- src/SignalCompiler.jl +101 -0
- src/strategy_template.jl +118 -0
- src/warmup.jl +47 -0
- src/warmup_bridge.py +30 -0
- utils/__init__.py +0 -0
- utils/config.py +125 -0
- utils/hf_io.py +192 -0
Dockerfile
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
# ββ System dependencies βββββββββββββββββββββββββββββββ
|
| 4 |
+
RUN apt-get update && apt-get install -y \
|
| 5 |
+
curl wget git ca-certificates \
|
| 6 |
+
tesseract-ocr tesseract-ocr-eng \
|
| 7 |
+
poppler-utils \
|
| 8 |
+
libgl1 libglib2.0-0 libsm6 libxext6 \
|
| 9 |
+
build-essential gfortran \
|
| 10 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 11 |
+
|
| 12 |
+
# ββ Install Julia 1.10 LTS ββββββββββββββββββββββββββββ
|
| 13 |
+
ENV JULIA_VERSION=1.10.7
|
| 14 |
+
RUN wget -q https://julialang-s3.julialang.org/bin/linux/x64/1.10/julia-${JULIA_VERSION}-linux-x86_64.tar.gz \
|
| 15 |
+
&& tar -xzf julia-${JULIA_VERSION}-linux-x86_64.tar.gz \
|
| 16 |
+
&& mv julia-${JULIA_VERSION} /usr/local/julia \
|
| 17 |
+
&& ln -s /usr/local/julia/bin/julia /usr/local/bin/julia \
|
| 18 |
+
&& rm julia-${JULIA_VERSION}-linux-x86_64.tar.gz \
|
| 19 |
+
&& julia --version
|
| 20 |
+
|
| 21 |
+
# ββ Julia / app environment βββββββββββββββββββββββββββ
|
| 22 |
+
ENV JULIA_DEPOT_PATH=/app/.julia
|
| 23 |
+
ENV JULIA_NUM_THREADS=4
|
| 24 |
+
ENV JULIA_PROJECT=/app/src
|
| 25 |
+
|
| 26 |
+
WORKDIR /app
|
| 27 |
+
|
| 28 |
+
# ββ Python dependencies βββββββββββββββββββββββββββββββ
|
| 29 |
+
COPY requirements.txt /tmp/requirements.txt
|
| 30 |
+
RUN pip install --no-cache-dir -r /tmp/requirements.txt
|
| 31 |
+
|
| 32 |
+
# ββ Copy project files ββββββββββββββββββββββββββββββββ
|
| 33 |
+
COPY . .
|
| 34 |
+
|
| 35 |
+
# ββ Step 1: Resolve + precompile Julia packages βββββββ
|
| 36 |
+
# Write Julia code to a temp script to avoid Docker misreading
|
| 37 |
+
# Julia keywords (using/import) as Dockerfile instructions.
|
| 38 |
+
RUN printf 'import Pkg\nPkg.instantiate()\nPkg.precompile()\nprintln("Julia packages resolved")\n' \
|
| 39 |
+
| julia --project=/app/src
|
| 40 |
+
|
| 41 |
+
# ββ Step 2: Warmup β JIT-compile all hot paths ββββββββ
|
| 42 |
+
RUN julia --project=/app/src /app/src/warmup.jl
|
| 43 |
+
|
| 44 |
+
# ββ Step 3: Pre-warm juliacall PythonβJulia bridge ββββ
|
| 45 |
+
RUN python3 /app/src/warmup_bridge.py
|
| 46 |
+
|
| 47 |
+
# ββ Runtime βββββββββββββββββββββββββββββββββββββββββββ
|
| 48 |
+
ENV GRADIO_SERVER_NAME=0.0.0.0
|
| 49 |
+
ENV GRADIO_SERVER_PORT=7860
|
| 50 |
+
ENV JULIA_PROJECT=/app/src
|
| 51 |
+
ENV JULIA_DEPOT_PATH=/app/.julia
|
| 52 |
+
|
| 53 |
+
EXPOSE 7860
|
| 54 |
+
CMD ["python", "app.py"]
|
README.md
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Quant Knowledge Extractor
|
| 3 |
+
emoji: π
|
| 4 |
+
colorFrom: green
|
| 5 |
+
colorTo: gray
|
| 6 |
+
sdk: docker
|
| 7 |
+
pinned: true
|
| 8 |
+
license: mit
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
# π Quant Knowledge Extractor β Julia Engine
|
| 12 |
+
|
| 13 |
+
Upload PDFs β Extract strategies β Backtest with Julia β Download MT5 `.set` files.
|
| 14 |
+
|
| 15 |
+
**Julia handles all computation** (indicators, backtest engine, walk-forward optimizer).
|
| 16 |
+
Python handles only UI (Gradio) and API calls (Claude, HuggingFace).
|
| 17 |
+
|
| 18 |
+
## Setup β Add these Secrets in Space Settings β Variables and Secrets
|
| 19 |
+
|
| 20 |
+
| Secret | Description |
|
| 21 |
+
|--------|-------------|
|
| 22 |
+
| `ANTHROPIC_API_KEY` | Claude API key |
|
| 23 |
+
| `HF_TOKEN` | HuggingFace write token |
|
| 24 |
+
| `HF_DATASET_REPO` | `your-username/quant-knowledge-base` |
|
| 25 |
+
| `HF_TICK_REPO` | `your-username/tick-data` |
|
app.py
ADDED
|
@@ -0,0 +1,482 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
app.py β HuggingFace Spaces entry point.
|
| 3 |
+
|
| 4 |
+
Architecture:
|
| 5 |
+
Python : Gradio UI, Claude API calls, HF I/O, PDF processing
|
| 6 |
+
Julia : Indicators, BacktestEngine, WalkForwardOptimizer, SignalCompiler
|
| 7 |
+
|
| 8 |
+
Python NEVER does numerical computation. It only:
|
| 9 |
+
1. Calls Claude API (extraction + strategy code generation)
|
| 10 |
+
2. Calls Julia via juliacall for all math
|
| 11 |
+
3. Reads/writes HuggingFace datasets
|
| 12 |
+
4. Renders Gradio UI
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
import io, json, zipfile, tempfile
|
| 16 |
+
from pathlib import Path
|
| 17 |
+
from datetime import datetime
|
| 18 |
+
|
| 19 |
+
import gradio as gr
|
| 20 |
+
from loguru import logger
|
| 21 |
+
|
| 22 |
+
import utils.config as cfg
|
| 23 |
+
import utils.hf_io as hf
|
| 24 |
+
from pipeline.pdf_processor import PDFProcessor
|
| 25 |
+
from pipeline.extractor import AIExtractor, Deduplicator
|
| 26 |
+
from pipeline.julia_bridge import full_backtest_pipeline, julia_available
|
| 27 |
+
from pipeline.exporter import (
|
| 28 |
+
slugify, strategy_md, formula_md,
|
| 29 |
+
backtest_report_md, optimal_json, mt5_set,
|
| 30 |
+
julia_config, index_md,
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
# ββ Lazy KB βββββββββββββββββββββββββββββββββββββββββββ
|
| 34 |
+
_kb = None
|
| 35 |
+
def get_kb():
|
| 36 |
+
global _kb
|
| 37 |
+
if _kb is None: _kb = hf.kb_load()
|
| 38 |
+
return _kb
|
| 39 |
+
def reset_kb():
|
| 40 |
+
global _kb; _kb = hf.kb_load()
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 44 |
+
# TAB 1 β UPLOAD & EXTRACT
|
| 45 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 46 |
+
|
| 47 |
+
def run_extraction(pdf_files, progress=gr.Progress()):
|
| 48 |
+
if not pdf_files: return "β οΈ No PDFs uploaded.", ""
|
| 49 |
+
if not cfg.ANTHROPIC_API_KEY: return "β ANTHROPIC_API_KEY secret not set.", ""
|
| 50 |
+
if not cfg.HF_DATASET_REPO: return "β HF_DATASET_REPO secret not set.", ""
|
| 51 |
+
|
| 52 |
+
proc = PDFProcessor()
|
| 53 |
+
ai = AIExtractor()
|
| 54 |
+
dedup = Deduplicator()
|
| 55 |
+
kb = get_kb()
|
| 56 |
+
log = []
|
| 57 |
+
totals = {k:{"added":0,"merged":0,"skipped":0} for k in ("strategies","formulas","systems")}
|
| 58 |
+
hf_files = []
|
| 59 |
+
|
| 60 |
+
for i, pdf_file in enumerate(pdf_files):
|
| 61 |
+
path = Path(pdf_file.name)
|
| 62 |
+
progress(i/len(pdf_files), desc=f"{path.name}")
|
| 63 |
+
log.append(f"\nπ [{i+1}/{len(pdf_files)}] {path.name}")
|
| 64 |
+
try:
|
| 65 |
+
chunks = list(proc.process(path))
|
| 66 |
+
log.append(f" β {len(chunks)} chunks")
|
| 67 |
+
except Exception as e:
|
| 68 |
+
log.append(f" β {e}"); continue
|
| 69 |
+
|
| 70 |
+
for chunk in chunks:
|
| 71 |
+
extracted = ai.extract(chunk)
|
| 72 |
+
stats = dedup.process(extracted, kb)
|
| 73 |
+
for kind in ("strategies","formulas","systems"):
|
| 74 |
+
for act in ("added","merged","skipped"):
|
| 75 |
+
totals[kind][act] += stats[kind][act]
|
| 76 |
+
|
| 77 |
+
log.append(f" β New: {totals['strategies']['added']} strats, {totals['formulas']['added']} formulas")
|
| 78 |
+
if cfg.HF_TOKEN: hf.pdf_upload(path)
|
| 79 |
+
|
| 80 |
+
for cid, rec in kb["strategies"].items():
|
| 81 |
+
hf_files.append((f"extracted/strategies/{slugify(rec.get('name',''))}.md",
|
| 82 |
+
strategy_md(rec).encode()))
|
| 83 |
+
for cid, rec in kb["formulas"].items():
|
| 84 |
+
hf_files.append((f"extracted/formulas/{slugify(rec.get('name',''))}.md",
|
| 85 |
+
formula_md(rec).encode()))
|
| 86 |
+
|
| 87 |
+
progress(0.9, desc="Saving to HuggingFaceβ¦")
|
| 88 |
+
hf.kb_save(kb)
|
| 89 |
+
if hf_files and cfg.HF_TOKEN:
|
| 90 |
+
pushed = hf.push_batch(hf_files, "Update extracted knowledge")
|
| 91 |
+
log.append(f"\nβοΈ Pushed {pushed} files to HuggingFace")
|
| 92 |
+
reset_kb()
|
| 93 |
+
|
| 94 |
+
counts = {k: len(kb[k]) for k in kb}
|
| 95 |
+
summary = f"""β
Extraction Complete
|
| 96 |
+
|
| 97 |
+
PDFs processed: {len(pdf_files)}
|
| 98 |
+
Strategies β added: {totals['strategies']['added']} merged: {totals['strategies']['merged']} skipped: {totals['strategies']['skipped']}
|
| 99 |
+
Formulas β added: {totals['formulas']['added']} merged: {totals['formulas']['merged']} skipped: {totals['formulas']['skipped']}
|
| 100 |
+
Systems β added: {totals['systems']['added']} merged: {totals['systems']['merged']} skipped: {totals['systems']['skipped']}
|
| 101 |
+
|
| 102 |
+
KB totals: {counts['strategies']} strategies Β· {counts['formulas']} formulas Β· {counts['systems']} systems
|
| 103 |
+
Tokens used: {ai.tokens_used:,}"""
|
| 104 |
+
return summary, "\n".join(log[-40:])
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 108 |
+
# TAB 2 β BROWSE KB
|
| 109 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 110 |
+
|
| 111 |
+
def search_strategies(query, category):
|
| 112 |
+
kb = get_kb(); items = list(kb["strategies"].values())
|
| 113 |
+
if category and category != "All":
|
| 114 |
+
items = [x for x in items if x.get("category") == category]
|
| 115 |
+
if query:
|
| 116 |
+
q = query.lower()
|
| 117 |
+
items = [x for x in items if q in x.get("name","").lower() or q in x.get("description","").lower()]
|
| 118 |
+
rows = [[x.get("name","")[:50], x.get("category",""),
|
| 119 |
+
x.get("description","")[:100],
|
| 120 |
+
", ".join(x.get("sources",[]))[:40], len(x.get("layers",[]))]
|
| 121 |
+
for x in items[:100]]
|
| 122 |
+
return rows, f"{len(items)} strategies"
|
| 123 |
+
|
| 124 |
+
def search_formulas(query):
|
| 125 |
+
kb = get_kb(); items = list(kb["formulas"].values())
|
| 126 |
+
if query:
|
| 127 |
+
q = query.lower()
|
| 128 |
+
items = [x for x in items if q in x.get("name","").lower() or q in x.get("purpose","").lower()]
|
| 129 |
+
return [[x.get("name","")[:50], x.get("category",""),
|
| 130 |
+
x.get("purpose","")[:80],
|
| 131 |
+
"β
" if x.get("latex") else "β",
|
| 132 |
+
", ".join(x.get("sources",[]))[:40]] for x in items[:100]]
|
| 133 |
+
|
| 134 |
+
def dl_strategy(name):
|
| 135 |
+
kb = get_kb()
|
| 136 |
+
for rec in kb["strategies"].values():
|
| 137 |
+
if rec.get("name","").lower() == name.strip().lower():
|
| 138 |
+
tmp = tempfile.mktemp(suffix=".md")
|
| 139 |
+
Path(tmp).write_text(strategy_md(rec), encoding="utf-8")
|
| 140 |
+
return tmp
|
| 141 |
+
return None
|
| 142 |
+
|
| 143 |
+
def dl_all_strategies_zip(category):
|
| 144 |
+
kb = get_kb(); items = list(kb["strategies"].values())
|
| 145 |
+
if category and category != "All":
|
| 146 |
+
items = [x for x in items if x.get("category") == category]
|
| 147 |
+
tmp = tempfile.mktemp(suffix=".zip")
|
| 148 |
+
with zipfile.ZipFile(tmp, "w", zipfile.ZIP_DEFLATED) as zf:
|
| 149 |
+
for rec in items:
|
| 150 |
+
zf.writestr(f"{slugify(rec.get('name','unknown'))}.md", strategy_md(rec))
|
| 151 |
+
return tmp
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 155 |
+
# TAB 3 β BACKTEST (Julia Engine)
|
| 156 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 157 |
+
|
| 158 |
+
def load_symbols():
|
| 159 |
+
syms = hf.tick_list_symbols()
|
| 160 |
+
return gr.update(choices=syms, value=syms[:2] if len(syms)>=2 else syms)
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
def run_backtests(selected_symbols, selected_timeframes,
|
| 164 |
+
strategy_filter, max_strategies, viable_only,
|
| 165 |
+
progress=gr.Progress()):
|
| 166 |
+
|
| 167 |
+
if not cfg.HF_TICK_REPO: return "β HF_TICK_REPO not set.", ""
|
| 168 |
+
if not cfg.ANTHROPIC_API_KEY: return "β ANTHROPIC_API_KEY not set.", ""
|
| 169 |
+
if not julia_available(): return "β Julia runtime not available. Check build logs.", ""
|
| 170 |
+
|
| 171 |
+
ai = AIExtractor()
|
| 172 |
+
kb = get_kb()
|
| 173 |
+
strats = list(kb["strategies"].values())
|
| 174 |
+
if strategy_filter:
|
| 175 |
+
strats = [s for s in strats if strategy_filter.lower() in s.get("name","").lower()]
|
| 176 |
+
if max_strategies > 0:
|
| 177 |
+
strats = strats[:int(max_strategies)]
|
| 178 |
+
if not strats: return "β οΈ No strategies. Run extraction first.", ""
|
| 179 |
+
|
| 180 |
+
symbols = selected_symbols or hf.tick_list_symbols()[:2]
|
| 181 |
+
timeframes = selected_timeframes or ["1h"]
|
| 182 |
+
|
| 183 |
+
log, all_results, viable_count = [], [], 0
|
| 184 |
+
|
| 185 |
+
for si, rec in enumerate(strats):
|
| 186 |
+
name = rec.get("name","?")
|
| 187 |
+
progress(si/len(strats), desc=f"[{si+1}/{len(strats)}] {name[:35]}")
|
| 188 |
+
|
| 189 |
+
# 1. Generate Julia signal code via Claude
|
| 190 |
+
jl_code = ai.compile_strategy_code(rec)
|
| 191 |
+
if not jl_code:
|
| 192 |
+
log.append(f"β Code gen failed: {name[:40]}"); continue
|
| 193 |
+
log.append(f"β
Julia code generated: {name[:40]}")
|
| 194 |
+
|
| 195 |
+
for sym in symbols:
|
| 196 |
+
for tf in timeframes:
|
| 197 |
+
df = hf.tick_load(sym, tf)
|
| 198 |
+
if df is None or len(df) < 200:
|
| 199 |
+
log.append(f" β οΈ {sym} {tf}: no data"); continue
|
| 200 |
+
|
| 201 |
+
# 2. Full Julia pipeline (compile β optimize β backtest)
|
| 202 |
+
result = full_backtest_pipeline(
|
| 203 |
+
strategy_code = jl_code,
|
| 204 |
+
strategy_name = name,
|
| 205 |
+
open_p = df["open"].values,
|
| 206 |
+
high = df["high"].values,
|
| 207 |
+
low = df["low"].values,
|
| 208 |
+
close = df["close"].values,
|
| 209 |
+
volume = df["volume"].values,
|
| 210 |
+
timeframe = tf,
|
| 211 |
+
symbol = sym,
|
| 212 |
+
n_windows = cfg.WF_WINDOWS,
|
| 213 |
+
is_ratio = cfg.WF_IS_RATIO,
|
| 214 |
+
min_trades = cfg.MIN_TRADES,
|
| 215 |
+
min_sharpe = cfg.MIN_SHARPE,
|
| 216 |
+
max_combos = cfg.MAX_PARAM_COMBOS,
|
| 217 |
+
initial_equity = cfg.INITIAL_EQUITY,
|
| 218 |
+
commission_pct = cfg.COMMISSION_PCT,
|
| 219 |
+
risk_per_trade = cfg.RISK_PER_TRADE,
|
| 220 |
+
)
|
| 221 |
+
all_results.append(result)
|
| 222 |
+
|
| 223 |
+
# 3. Build + push output files
|
| 224 |
+
if cfg.HF_TOKEN and cfg.HF_DATASET_REPO:
|
| 225 |
+
if not viable_only or result.get("is_viable"):
|
| 226 |
+
hf.push_result(
|
| 227 |
+
name, sym, tf,
|
| 228 |
+
backtest_report_md(result, rec),
|
| 229 |
+
optimal_json(result, rec),
|
| 230 |
+
mt5_set(result, rec),
|
| 231 |
+
julia_config(result),
|
| 232 |
+
)
|
| 233 |
+
|
| 234 |
+
status = "β
" if result.get("is_viable") else "β"
|
| 235 |
+
log.append(
|
| 236 |
+
f" {status} {sym} {tf}: "
|
| 237 |
+
f"Sharpe={result.get('oos_sharpe_mean',0):.2f} "
|
| 238 |
+
f"DD={result.get('oos_max_dd',0):.1f}% "
|
| 239 |
+
f"Score={result.get('robustness',0):.0f}")
|
| 240 |
+
if result.get("is_viable"): viable_count += 1
|
| 241 |
+
|
| 242 |
+
# 4. Push master index
|
| 243 |
+
if all_results and cfg.HF_TOKEN:
|
| 244 |
+
hf.push_index(index_md(all_results), {
|
| 245 |
+
"generated": datetime.now().isoformat(),
|
| 246 |
+
"engine": "Julia 1.10",
|
| 247 |
+
"total_strategies": len(all_results),
|
| 248 |
+
"viable_count": viable_count,
|
| 249 |
+
"strategies": all_results,
|
| 250 |
+
})
|
| 251 |
+
|
| 252 |
+
summary = f"""π Julia Backtest Complete
|
| 253 |
+
|
| 254 |
+
Engine: Julia 1.10 BacktestEngine.jl
|
| 255 |
+
Strategies compiled: {len(strats)}
|
| 256 |
+
Combinations tested: {len(all_results)}
|
| 257 |
+
Viable strategies: {viable_count}
|
| 258 |
+
Pass rate: {viable_count/max(len(all_results),1)*100:.1f}%
|
| 259 |
+
|
| 260 |
+
Results on HuggingFace:
|
| 261 |
+
{cfg.HF_DATASET_REPO}/optimal_sets/BACKTEST_INDEX.md"""
|
| 262 |
+
return summary, "\n".join(log[-60:])
|
| 263 |
+
|
| 264 |
+
|
| 265 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 266 |
+
# TAB 4 β RESULTS
|
| 267 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 268 |
+
|
| 269 |
+
def load_results():
|
| 270 |
+
data = hf.fetch_index()
|
| 271 |
+
if not data: return [], "No results yet."
|
| 272 |
+
strats = data.get("strategies",[])
|
| 273 |
+
viable = sorted([s for s in strats if s.get("is_viable")],
|
| 274 |
+
key=lambda x: x.get("oos_sharpe_mean",0), reverse=True)
|
| 275 |
+
rows = [[s.get("strategy","")[:45], s.get("symbol",""), s.get("timeframe",""),
|
| 276 |
+
f'{s.get("oos_sharpe_mean",0):.2f}', f'{s.get("oos_max_dd",0):.1f}%',
|
| 277 |
+
f'{s.get("oos_win_rate",0):.1f}%', f'{s.get("oos_pf_mean",0):.2f}',
|
| 278 |
+
f'{s.get("robustness",0):.0f}'] for s in viable]
|
| 279 |
+
count = (f"β
{len(viable)} viable / {len(strats)} tested | "
|
| 280 |
+
f"Engine: Julia | {data.get('generated','')[:16]}")
|
| 281 |
+
return rows, count
|
| 282 |
+
|
| 283 |
+
def dl_result_file(name, symbol, tf, ftype):
|
| 284 |
+
sl = slugify(name); sym = symbol.upper().strip()
|
| 285 |
+
pre = f"{sl}_{sym}_{tf}"
|
| 286 |
+
ext_map = {"MT5 .set file": f"optimal_sets/{pre}.set",
|
| 287 |
+
"Optimal JSON": f"optimal_sets/{pre}_optimal.json",
|
| 288 |
+
"Julia config": f"optimal_sets/{pre}_config.jl",
|
| 289 |
+
"Full report": f"backtests/{sl}/{pre}_report.md"}
|
| 290 |
+
remote = ext_map.get(ftype,"")
|
| 291 |
+
if not remote: return None
|
| 292 |
+
data = hf.fetch_file(remote)
|
| 293 |
+
if not data: return None
|
| 294 |
+
tmp = tempfile.mktemp(suffix=Path(remote).suffix)
|
| 295 |
+
Path(tmp).write_bytes(data)
|
| 296 |
+
return tmp
|
| 297 |
+
|
| 298 |
+
def dl_all_sets():
|
| 299 |
+
data = hf.fetch_index()
|
| 300 |
+
if not data: return None
|
| 301 |
+
tmp = tempfile.mktemp(suffix=".zip")
|
| 302 |
+
with zipfile.ZipFile(tmp,"w",zipfile.ZIP_DEFLATED) as zf:
|
| 303 |
+
for s in data.get("strategies",[]):
|
| 304 |
+
if not s.get("is_viable"): continue
|
| 305 |
+
sl = slugify(s["strategy"]); sym = s["symbol"]; tf = s["timeframe"]
|
| 306 |
+
content = hf.fetch_file(f"optimal_sets/{sl}_{sym}_{tf}.set")
|
| 307 |
+
if content: zf.writestr(f"{sl}_{sym}_{tf}.set", content)
|
| 308 |
+
return tmp
|
| 309 |
+
|
| 310 |
+
|
| 311 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 312 |
+
# TAB 5 β SETUP
|
| 313 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 314 |
+
|
| 315 |
+
def check_config():
|
| 316 |
+
checks = [
|
| 317 |
+
("ANTHROPIC_API_KEY", cfg.ANTHROPIC_API_KEY, "Claude API"),
|
| 318 |
+
("HF_TOKEN", cfg.HF_TOKEN, "HF write access"),
|
| 319 |
+
("HF_DATASET_REPO", cfg.HF_DATASET_REPO, "Results storage"),
|
| 320 |
+
("HF_TICK_REPO", cfg.HF_TICK_REPO, "Tick data source"),
|
| 321 |
+
]
|
| 322 |
+
kb = get_kb()
|
| 323 |
+
symbols = hf.tick_list_symbols() if cfg.HF_TICK_REPO else []
|
| 324 |
+
jl_ok = julia_available()
|
| 325 |
+
|
| 326 |
+
lines = ["## Configuration Status", ""]
|
| 327 |
+
for name, val, desc in checks:
|
| 328 |
+
icon = "β
" if val else "β"
|
| 329 |
+
lines.append(f"{icon} `{name}` β {desc}")
|
| 330 |
+
|
| 331 |
+
lines += ["", "## Julia Engine", "",
|
| 332 |
+
f"{'β
' if jl_ok else 'β'} Julia runtime: {'available' if jl_ok else 'not available (check build logs)'}",
|
| 333 |
+
"", "## Data Status", "",
|
| 334 |
+
f"- Tick symbols: **{len(symbols)}** β {', '.join(symbols[:8])}",
|
| 335 |
+
f"- Strategies in KB: **{len(kb['strategies'])}**",
|
| 336 |
+
f"- Formulas in KB: **{len(kb['formulas'])}**",
|
| 337 |
+
"", "## Backtest Settings", "",
|
| 338 |
+
f"- WF Windows: `{cfg.WF_WINDOWS}` Β· IS Ratio: `{cfg.WF_IS_RATIO}`",
|
| 339 |
+
f"- Min Trades: `{cfg.MIN_TRADES}` Β· Min Sharpe: `{cfg.MIN_SHARPE}`",
|
| 340 |
+
f"- Commission: `{cfg.COMMISSION_PCT*100:.3f}%` Β· Risk/trade: `{cfg.RISK_PER_TRADE*100:.1f}%`",
|
| 341 |
+
f"- Timeframes: `{', '.join(cfg.BACKTEST_TFS)}`"]
|
| 342 |
+
return "\n".join(lines)
|
| 343 |
+
|
| 344 |
+
|
| 345 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 346 |
+
# BUILD APP
|
| 347 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 348 |
+
|
| 349 |
+
CATS = ["All"] + cfg.CATEGORIES
|
| 350 |
+
|
| 351 |
+
with gr.Blocks(
|
| 352 |
+
title="Quant Knowledge Extractor β Julia Engine",
|
| 353 |
+
theme=gr.themes.Base(primary_hue="green", neutral_hue="gray"),
|
| 354 |
+
css=".status-box{font-family:monospace;font-size:.82em}"
|
| 355 |
+
) as demo:
|
| 356 |
+
|
| 357 |
+
gr.HTML("""
|
| 358 |
+
<div style="text-align:center;padding:1.2em 0 .3em">
|
| 359 |
+
<h1 style="font-size:2em;color:#16a34a;margin:0">π Quant Knowledge Extractor</h1>
|
| 360 |
+
<p style="color:#6b7280;margin:.4em 0 0">
|
| 361 |
+
Julia 1.10 Engine Β· BacktestEngine.jl Β· WalkForward Optimizer Β· MT5 .set Output
|
| 362 |
+
</p>
|
| 363 |
+
</div>""")
|
| 364 |
+
|
| 365 |
+
with gr.Tabs():
|
| 366 |
+
|
| 367 |
+
# Tab 1 β Extract
|
| 368 |
+
with gr.Tab("π€ Upload & Extract"):
|
| 369 |
+
gr.Markdown("### Upload algorithmic trading PDFs β OCR applied automatically")
|
| 370 |
+
with gr.Row():
|
| 371 |
+
with gr.Column(scale=2):
|
| 372 |
+
pdf_in = gr.File(label="Drop PDFs here", file_count="multiple", file_types=[".pdf"])
|
| 373 |
+
ext_btn = gr.Button("π Extract Knowledge", variant="primary", size="lg")
|
| 374 |
+
with gr.Column(scale=1):
|
| 375 |
+
ext_out = gr.Textbox(label="Result", lines=14, interactive=False, elem_classes=["status-box"])
|
| 376 |
+
ext_log = gr.Textbox(label="Log", lines=8, interactive=False, elem_classes=["status-box"])
|
| 377 |
+
ext_btn.click(fn=run_extraction, inputs=[pdf_in], outputs=[ext_out, ext_log])
|
| 378 |
+
|
| 379 |
+
# Tab 2 β Browse
|
| 380 |
+
with gr.Tab("π Knowledge Base"):
|
| 381 |
+
with gr.Tabs():
|
| 382 |
+
with gr.Tab("π Strategies"):
|
| 383 |
+
with gr.Row():
|
| 384 |
+
sq = gr.Textbox(label="Search", placeholder="RSI, breakout, Kellyβ¦")
|
| 385 |
+
sc = gr.Dropdown(choices=CATS, value="All", label="Category")
|
| 386 |
+
sb = gr.Button("π Search", variant="primary")
|
| 387 |
+
st = gr.Dataframe(headers=["Name","Category","Description","Sources","Variants"],
|
| 388 |
+
datatype=["str"]*4+["number"], interactive=False)
|
| 389 |
+
sn = gr.Markdown("")
|
| 390 |
+
with gr.Row():
|
| 391 |
+
sni = gr.Textbox(label="Name to download")
|
| 392 |
+
sdb = gr.Button("β¬οΈ Download MD"); sdf = gr.File(label="")
|
| 393 |
+
szb = gr.Button("π¦ Category ZIP"); szf = gr.File(label="")
|
| 394 |
+
sb.click(fn=search_strategies, inputs=[sq,sc], outputs=[st,sn])
|
| 395 |
+
sdb.click(fn=dl_strategy, inputs=[sni], outputs=[sdf])
|
| 396 |
+
szb.click(fn=dl_all_strategies_zip, inputs=[sc], outputs=[szf])
|
| 397 |
+
with gr.Tab("β Formulas"):
|
| 398 |
+
with gr.Row():
|
| 399 |
+
fq = gr.Textbox(label="Search", placeholder="Sharpe, Kelly, ATRβ¦")
|
| 400 |
+
fb = gr.Button("π Search", variant="primary")
|
| 401 |
+
ft = gr.Dataframe(headers=["Name","Category","Purpose","LaTeX","Sources"],
|
| 402 |
+
datatype=["str"]*5, interactive=False)
|
| 403 |
+
fb.click(fn=search_formulas, inputs=[fq], outputs=[ft])
|
| 404 |
+
|
| 405 |
+
# Tab 3 β Backtest
|
| 406 |
+
with gr.Tab("π¬ Julia Backtest"):
|
| 407 |
+
gr.Markdown(
|
| 408 |
+
"### Walk-Forward Backtest β Julia Engine\n"
|
| 409 |
+
"Claude generates Julia signal code β Julia compiles + optimizes β "
|
| 410 |
+
"MT5 `.set` files pushed to HuggingFace."
|
| 411 |
+
)
|
| 412 |
+
with gr.Row():
|
| 413 |
+
with gr.Column(scale=2):
|
| 414 |
+
bt_load = gr.Button("π Load Symbols from HF")
|
| 415 |
+
bt_syms = gr.CheckboxGroup(label="Symbols", choices=[], value=[])
|
| 416 |
+
bt_tfs = gr.CheckboxGroup(
|
| 417 |
+
label="Timeframes", value=["1h","4h"],
|
| 418 |
+
choices=["1m","5m","15m","30m","1h","4h","1d"])
|
| 419 |
+
bt_filt = gr.Textbox(label="Strategy filter (optional)")
|
| 420 |
+
bt_max = gr.Slider(0, 500, value=0, step=10, label="Max strategies (0=all)")
|
| 421 |
+
bt_viable= gr.Checkbox(label="Push only VIABLE to HuggingFace", value=True)
|
| 422 |
+
bt_run = gr.Button("π Run Julia Backtests", variant="primary", size="lg")
|
| 423 |
+
with gr.Column(scale=1):
|
| 424 |
+
bt_out = gr.Textbox(label="Summary", lines=12, interactive=False, elem_classes=["status-box"])
|
| 425 |
+
bt_log = gr.Textbox(label="Log", lines=12, interactive=False, elem_classes=["status-box"])
|
| 426 |
+
bt_load.click(fn=load_symbols, outputs=[bt_syms])
|
| 427 |
+
bt_run.click(fn=run_backtests,
|
| 428 |
+
inputs=[bt_syms, bt_tfs, bt_filt, bt_max, bt_viable],
|
| 429 |
+
outputs=[bt_out, bt_log])
|
| 430 |
+
|
| 431 |
+
# Tab 4 β Results
|
| 432 |
+
with gr.Tab("π Results"):
|
| 433 |
+
gr.Markdown("### Viable Strategies β Download MT5 `.set` & Julia Configs")
|
| 434 |
+
res_ref = gr.Button("π Refresh from HuggingFace", variant="primary")
|
| 435 |
+
res_tbl = gr.Dataframe(
|
| 436 |
+
headers=["Strategy","Symbol","TF","Sharpe","Max DD","Win%","PF","Score"],
|
| 437 |
+
datatype=["str"]*8, interactive=False)
|
| 438 |
+
res_cnt = gr.Markdown("")
|
| 439 |
+
gr.Markdown("#### Download individual file")
|
| 440 |
+
with gr.Row():
|
| 441 |
+
rn = gr.Textbox(label="Strategy name"); rs = gr.Textbox(label="Symbol")
|
| 442 |
+
rt = gr.Textbox(label="Timeframe")
|
| 443 |
+
rf = gr.Dropdown(choices=["MT5 .set file","Optimal JSON",
|
| 444 |
+
"Julia config","Full report"],
|
| 445 |
+
value="MT5 .set file", label="File type")
|
| 446 |
+
rdb = gr.Button("β¬οΈ Download", variant="primary"); rdf = gr.File(label="")
|
| 447 |
+
gr.Markdown("#### Batch download all viable strategies")
|
| 448 |
+
with gr.Row():
|
| 449 |
+
rsb = gr.Button("π― All MT5 .set (ZIP)"); rsf = gr.File(label="")
|
| 450 |
+
res_ref.click(fn=load_results, outputs=[res_tbl, res_cnt])
|
| 451 |
+
rdb.click(fn=dl_result_file, inputs=[rn,rs,rt,rf], outputs=[rdf])
|
| 452 |
+
rsb.click(fn=dl_all_sets, outputs=[rsf])
|
| 453 |
+
demo.load(fn=load_results, outputs=[res_tbl, res_cnt])
|
| 454 |
+
|
| 455 |
+
# Tab 5 β Setup
|
| 456 |
+
with gr.Tab("βοΈ Setup & Status"):
|
| 457 |
+
gr.Markdown("""### Required Secrets (Space Settings β Variables and Secrets)
|
| 458 |
+
|
| 459 |
+
| Secret | Description |
|
| 460 |
+
|--------|-------------|
|
| 461 |
+
| `ANTHROPIC_API_KEY` | Claude API key |
|
| 462 |
+
| `HF_TOKEN` | HuggingFace write token |
|
| 463 |
+
| `HF_DATASET_REPO` | `your-username/quant-knowledge-base` |
|
| 464 |
+
| `HF_TICK_REPO` | `your-username/tick-data` |
|
| 465 |
+
|
| 466 |
+
### Tick Data Format
|
| 467 |
+
Upload to your `tick-data` dataset:
|
| 468 |
+
```
|
| 469 |
+
EURUSD/ticks.parquet (columns: timestamp, bid, ask OR open,high,low,close,volume)
|
| 470 |
+
BTCUSDT/1h.parquet (pre-built OHLCV β faster)
|
| 471 |
+
```
|
| 472 |
+
""")
|
| 473 |
+
cfg_ref = gr.Button("π Check Status")
|
| 474 |
+
cfg_out = gr.Markdown(check_config())
|
| 475 |
+
cfg_ref.click(fn=check_config, outputs=[cfg_out])
|
| 476 |
+
|
| 477 |
+
gr.HTML("""<div style="text-align:center;padding:.8em;color:#9ca3af;font-size:.75em">
|
| 478 |
+
Quant Knowledge Extractor Β· Julia 1.10 Engine Β· HuggingFace Spaces
|
| 479 |
+
</div>""")
|
| 480 |
+
|
| 481 |
+
if __name__ == "__main__":
|
| 482 |
+
demo.launch()
|
pipeline/__init__.py
ADDED
|
File without changes
|
pipeline/exporter.py
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""pipeline/exporter.py β Generate markdown, MT5 .set, Julia config strings."""
|
| 2 |
+
import re, json
|
| 3 |
+
from datetime import datetime
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def slugify(t):
|
| 7 |
+
t = t.lower().strip()
|
| 8 |
+
t = re.sub(r"[^\w\s-]","",t)
|
| 9 |
+
t = re.sub(r"[\s_-]+","-",t)
|
| 10 |
+
return t[:50]
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def strategy_md(rec):
|
| 14 |
+
name = rec.get("name","Unknown")
|
| 15 |
+
sources = rec.get("sources",[])
|
| 16 |
+
layers = rec.get("layers",[])
|
| 17 |
+
lines = [f"# {name}", "",
|
| 18 |
+
f"> **Category:** {rec.get('category','')} ",
|
| 19 |
+
f"> **Sources:** {', '.join(sources)[:80]} ",
|
| 20 |
+
f"> **ID:** `{rec.get('canonical_id','')}`",
|
| 21 |
+
"", "---", "", "## Description", "", rec.get("description",""), ""]
|
| 22 |
+
for label, key in [("## Entry Rules","entry_rules"),("## Exit Rules","exit_rules"),("## Filters","filters")]:
|
| 23 |
+
items = rec.get(key,[])
|
| 24 |
+
if items:
|
| 25 |
+
lines += [label, ""]
|
| 26 |
+
for i,r in enumerate(items,1): lines.append(f"{i}. {r}")
|
| 27 |
+
lines.append("")
|
| 28 |
+
if rec.get("mathematical_basis"):
|
| 29 |
+
lines += ["## Mathematical Basis", "", rec["mathematical_basis"], ""]
|
| 30 |
+
if layers:
|
| 31 |
+
lines += [f"## Variants ({len(layers)} sources)", ""]
|
| 32 |
+
for i,l in enumerate(layers,1):
|
| 33 |
+
d = l.get("data",{}); lines.append(f"### Variant {i} β {l.get('source_file','')}")
|
| 34 |
+
if d.get("description"): lines.append(d["description"])
|
| 35 |
+
lines.append("")
|
| 36 |
+
lines += ["---","","*Generated by Quant Knowledge Extractor β Julia Engine*"]
|
| 37 |
+
return "\n".join(lines)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def formula_md(rec):
|
| 41 |
+
lines = [f"# {rec.get('name','Unknown')}", "",
|
| 42 |
+
f"> **Category:** {rec.get('category','')} ",
|
| 43 |
+
f"> **Sources:** {', '.join(rec.get('sources',[]))[:80]}",
|
| 44 |
+
"", "---", ""]
|
| 45 |
+
if rec.get("latex"):
|
| 46 |
+
lines += ["## Formula (LaTeX)", "", "$$", rec["latex"], "$$", ""]
|
| 47 |
+
if rec.get("plain_text"):
|
| 48 |
+
lines += ["## Plain Text", "", "```", rec["plain_text"], "```", ""]
|
| 49 |
+
if rec.get("purpose"):
|
| 50 |
+
lines += ["## Purpose", "", rec["purpose"], ""]
|
| 51 |
+
if rec.get("variables"):
|
| 52 |
+
lines += ["## Variables", "", "| Symbol | Description |", "|--------|-------------|"]
|
| 53 |
+
for s,d in rec["variables"].items(): lines.append(f"| `{s}` | {d} |")
|
| 54 |
+
lines.append("")
|
| 55 |
+
lines += ["---","","*Generated by Quant Knowledge Extractor β Julia Engine*"]
|
| 56 |
+
return "\n".join(lines)
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def backtest_report_md(result: dict, record: dict) -> str:
|
| 60 |
+
name = result.get("strategy","Unknown")
|
| 61 |
+
sym = result.get("symbol","")
|
| 62 |
+
tf = result.get("timeframe","")
|
| 63 |
+
viable = result.get("is_viable", False)
|
| 64 |
+
status = "β
VIABLE" if viable else "β NOT VIABLE"
|
| 65 |
+
now = datetime.now().strftime("%Y-%m-%d %H:%M UTC")
|
| 66 |
+
lines = [
|
| 67 |
+
f"# {name}", f"## Backtest Report β {sym} {tf}", "",
|
| 68 |
+
f"> **Status:** {status} ",
|
| 69 |
+
f"> **Robustness:** {result.get('robustness',0):.0f}/100 ",
|
| 70 |
+
f"> **Engine:** Julia 1.10 ",
|
| 71 |
+
f"> **Generated:** {now}", "", "---", "", "## Viability", "",
|
| 72 |
+
]
|
| 73 |
+
for r in result.get("reasons",[]): lines.append(f"- {r}")
|
| 74 |
+
lines += [""]
|
| 75 |
+
if result.get("optimal_params"):
|
| 76 |
+
lines += ["## Optimal Parameters (Julia Walk-Forward)", "",
|
| 77 |
+
"| Parameter | Value |", "|-----------|-------|"]
|
| 78 |
+
for k,v in result["optimal_params"].items():
|
| 79 |
+
lines.append(f"| `{k}` | `{v:.4g}` |")
|
| 80 |
+
lines += [""]
|
| 81 |
+
lines += [
|
| 82 |
+
"## OOS Performance (Walk-Forward Aggregate)", "",
|
| 83 |
+
"| Metric | Value |", "|--------|-------|",
|
| 84 |
+
f"| Sharpe (meanΒ±std) | `{result.get('oos_sharpe_mean',0):.3f} Β± {result.get('oos_sharpe_std',0):.3f}` |",
|
| 85 |
+
f"| Win Rate | `{result.get('oos_win_rate',0):.1f}%` |",
|
| 86 |
+
f"| Max Drawdown (mean) | `{result.get('oos_max_dd',0):.2f}%` |",
|
| 87 |
+
f"| Profit Factor | `{result.get('oos_pf_mean',0):.3f}` |",
|
| 88 |
+
f"| Total OOS Trades | `{result.get('oos_trades',0)}` |",
|
| 89 |
+
f"| WF Efficiency Ratio | `{result.get('wf_efficiency',0):.3f}` |",
|
| 90 |
+
f"| Robustness Score | `{result.get('robustness',0):.0f}/100` |",
|
| 91 |
+
]
|
| 92 |
+
if record.get("description"):
|
| 93 |
+
lines += ["", "## Strategy Description", "", record["description"]]
|
| 94 |
+
lines += ["", "---", "", "*Backtested using Julia 1.10 β BacktestEngine.jl + Optimizer.jl*"]
|
| 95 |
+
return "\n".join(lines)
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def optimal_json(result: dict, record: dict) -> dict:
|
| 99 |
+
return {
|
| 100 |
+
"metadata": {
|
| 101 |
+
"strategy": result.get("strategy"), "symbol": result.get("symbol"),
|
| 102 |
+
"timeframe": result.get("timeframe"),
|
| 103 |
+
"generated": datetime.now().isoformat(),
|
| 104 |
+
"engine": "Julia 1.10",
|
| 105 |
+
"is_viable": result.get("is_viable"), "robustness": result.get("robustness"),
|
| 106 |
+
},
|
| 107 |
+
"optimal_parameters": result.get("optimal_params",{}),
|
| 108 |
+
"performance": {
|
| 109 |
+
"oos_sharpe_mean": round(result.get("oos_sharpe_mean",0),4),
|
| 110 |
+
"oos_sharpe_std": round(result.get("oos_sharpe_std",0),4),
|
| 111 |
+
"oos_win_rate": round(result.get("oos_win_rate",0),2),
|
| 112 |
+
"oos_max_dd_pct": round(result.get("oos_max_dd",0),2),
|
| 113 |
+
"oos_pf_mean": round(result.get("oos_pf_mean",0),4),
|
| 114 |
+
"oos_total_trades":result.get("oos_trades",0),
|
| 115 |
+
"wf_efficiency": round(result.get("wf_efficiency",0),4),
|
| 116 |
+
},
|
| 117 |
+
"viability": {"is_viable": result.get("is_viable"), "reasons": result.get("reasons",[])},
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def mt5_set(result: dict, record: dict) -> str:
|
| 122 |
+
name = result.get("strategy","Unknown")
|
| 123 |
+
sym = result.get("symbol",""); tf = result.get("timeframe","")
|
| 124 |
+
params = result.get("optimal_params",{})
|
| 125 |
+
lines = [
|
| 126 |
+
f"; MT5 Strategy Tester Set File β Generated by Julia Engine",
|
| 127 |
+
f"; Strategy: {name}", f"; Symbol: {sym} Timeframe: {tf}",
|
| 128 |
+
f"; OOS Sharpe: {result.get('oos_sharpe_mean',0):.3f} Score: {result.get('robustness',0):.0f}/100",
|
| 129 |
+
f"; Viable: {result.get('is_viable', False)}",
|
| 130 |
+
f";", f"; 1. Open MT5 β View β Strategy Tester",
|
| 131 |
+
f"; 2. Select Expert Advisor", f"; 3. Inputs β Open β select this file", "",
|
| 132 |
+
]
|
| 133 |
+
for k, v in params.items():
|
| 134 |
+
mt5k = "".join(w.capitalize() for w in k.replace("-","_").split("_"))
|
| 135 |
+
try:
|
| 136 |
+
fv = float(v); step = max(1.0, abs(fv)*0.1)
|
| 137 |
+
lines.append(f"{mt5k}={fv:.4g}||{max(0,fv-step*3):.4g}||{step:.4g}||{fv+step*3:.4g}||1")
|
| 138 |
+
except: lines.append(f"{mt5k}={v}")
|
| 139 |
+
lines += ["","RiskPercent=1.0||0.5||0.5||3.0||1","LotSize=0.1||0.01||0.01||1.0||1"]
|
| 140 |
+
return "\n".join(lines)
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
def julia_config(result: dict) -> str:
|
| 144 |
+
name = result.get("strategy","Unknown")
|
| 145 |
+
sym = result.get("symbol",""); tf = result.get("timeframe","")
|
| 146 |
+
struct = "".join(w.capitalize() for w in name.split()[:4])
|
| 147 |
+
params = result.get("optimal_params",{})
|
| 148 |
+
plines = "\n ".join(
|
| 149 |
+
f'{k.lower().replace("-","_")} = {float(v):.6g}'
|
| 150 |
+
for k,v in params.items()
|
| 151 |
+
) or "# no parameters"
|
| 152 |
+
return f'''# Optimal Parameters β {name}
|
| 153 |
+
# Engine: Julia 1.10 BacktestEngine.jl + Optimizer.jl
|
| 154 |
+
# Symbol: {sym} Timeframe: {tf}
|
| 155 |
+
# OOS Sharpe: {result.get("oos_sharpe_mean",0):.3f} Robustness: {result.get("robustness",0):.0f}/100
|
| 156 |
+
# Viable: {result.get("is_viable",False)}
|
| 157 |
+
# Generated: {datetime.now().strftime("%Y-%m-%d")}
|
| 158 |
+
|
| 159 |
+
function optimal_params()::{struct}Params
|
| 160 |
+
return {struct}Params(
|
| 161 |
+
{plines}
|
| 162 |
+
)
|
| 163 |
+
end
|
| 164 |
+
|
| 165 |
+
const BACKTEST_METADATA = (
|
| 166 |
+
strategy = "{name}",
|
| 167 |
+
symbol = "{sym}",
|
| 168 |
+
timeframe = "{tf}",
|
| 169 |
+
engine = "Julia 1.10",
|
| 170 |
+
oos_sharpe = {result.get("oos_sharpe_mean",0):.4f},
|
| 171 |
+
max_dd_pct = {result.get("oos_max_dd",0):.2f},
|
| 172 |
+
win_rate = {result.get("oos_win_rate",0):.1f},
|
| 173 |
+
wf_eff = {result.get("wf_efficiency",0):.4f},
|
| 174 |
+
viable = {str(result.get("is_viable",False)).lower()},
|
| 175 |
+
)
|
| 176 |
+
'''
|
| 177 |
+
|
| 178 |
+
|
| 179 |
+
def index_md(all_results: list) -> str:
|
| 180 |
+
viable = sorted([r for r in all_results if r.get("is_viable")],
|
| 181 |
+
key=lambda r: r.get("oos_sharpe_mean",0), reverse=True)
|
| 182 |
+
not_v = [r for r in all_results if not r.get("is_viable")]
|
| 183 |
+
now = datetime.now().strftime("%Y-%m-%d %H:%M UTC")
|
| 184 |
+
lines = [
|
| 185 |
+
"# Backtest Results Index β Julia Engine", "",
|
| 186 |
+
f"> Generated: {now} Engine: Julia 1.10 ",
|
| 187 |
+
f"> Total: {len(all_results)} Viable: {len(viable)}", "", "---", "",
|
| 188 |
+
"## β
Viable Strategies (by OOS Sharpe)", "",
|
| 189 |
+
"| # | Strategy | Symbol | TF | Sharpe | DD% | Win% | PF | Score |",
|
| 190 |
+
"|---|----------|--------|----|--------|-----|------|----|-------|",
|
| 191 |
+
]
|
| 192 |
+
for i,r in enumerate(viable,1):
|
| 193 |
+
lines.append(
|
| 194 |
+
f"| {i} | {r.get('strategy','')[:35]} | {r.get('symbol','')} | {r.get('timeframe','')} | "
|
| 195 |
+
f"{r.get('oos_sharpe_mean',0):.2f} | {r.get('oos_max_dd',0):.1f} | "
|
| 196 |
+
f"{r.get('oos_win_rate',0):.1f} | {r.get('oos_pf_mean',0):.2f} | {r.get('robustness',0):.0f} |")
|
| 197 |
+
lines += ["", "## β Not Viable", "", "| Strategy | Symbol | TF | Reason |", "|----------|--------|----|--------|"]
|
| 198 |
+
for r in not_v[:30]:
|
| 199 |
+
reasons = r.get("reasons",[])
|
| 200 |
+
reason = reasons[0][:60] if reasons else ""
|
| 201 |
+
lines.append(f"| {r.get('strategy','')[:35]} | {r.get('symbol','')} | {r.get('timeframe','')} | {reason} |")
|
| 202 |
+
return "\n".join(lines)
|
pipeline/extractor.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""pipeline/extractor.py β Claude API extraction + 3-layer deduplication."""
|
| 2 |
+
import json, time, hashlib
|
| 3 |
+
from typing import Optional
|
| 4 |
+
import numpy as np
|
| 5 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 6 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
| 7 |
+
import anthropic
|
| 8 |
+
from loguru import logger
|
| 9 |
+
import utils.config as cfg
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class AIExtractor:
|
| 13 |
+
MODEL = "claude-sonnet-4-20250514"
|
| 14 |
+
def __init__(self):
|
| 15 |
+
self.client = anthropic.Anthropic(api_key=cfg.ANTHROPIC_API_KEY)
|
| 16 |
+
self.tokens_used = 0
|
| 17 |
+
|
| 18 |
+
def extract(self, chunk) -> dict:
|
| 19 |
+
if chunk.word_count < 20:
|
| 20 |
+
return {"strategies":[],"formulas":[],"systems":[]}
|
| 21 |
+
prompt = cfg.EXTRACTION_PROMPT.format(
|
| 22 |
+
source_file=chunk.source_file, page_start=chunk.page_start,
|
| 23 |
+
page_end=chunk.page_end, text=chunk.text)
|
| 24 |
+
raw = self._call(prompt)
|
| 25 |
+
if not raw: return {"strategies":[],"formulas":[],"systems":[]}
|
| 26 |
+
return self._parse(raw, chunk)
|
| 27 |
+
|
| 28 |
+
def _call(self, prompt, retries=3):
|
| 29 |
+
delay = 2.0
|
| 30 |
+
for attempt in range(retries):
|
| 31 |
+
try:
|
| 32 |
+
resp = self.client.messages.create(
|
| 33 |
+
model=self.MODEL, max_tokens=4096,
|
| 34 |
+
messages=[{"role":"user","content":prompt}])
|
| 35 |
+
self.tokens_used += resp.usage.input_tokens + resp.usage.output_tokens
|
| 36 |
+
return resp.content[0].text if resp.content else ""
|
| 37 |
+
except anthropic.RateLimitError:
|
| 38 |
+
logger.warning(f"Rate limit β {delay}s")
|
| 39 |
+
time.sleep(delay); delay *= 2
|
| 40 |
+
except Exception as e:
|
| 41 |
+
logger.error(f"API: {e}")
|
| 42 |
+
if attempt == retries-1: return ""
|
| 43 |
+
time.sleep(delay); delay *= 2
|
| 44 |
+
return ""
|
| 45 |
+
|
| 46 |
+
def _parse(self, raw, chunk):
|
| 47 |
+
raw = raw.strip()
|
| 48 |
+
if raw.startswith("```"): raw = "\n".join(raw.split("\n")[1:]).rstrip("`").strip()
|
| 49 |
+
try: data = json.loads(raw)
|
| 50 |
+
except:
|
| 51 |
+
try:
|
| 52 |
+
s=raw.find("{"); e=raw.rfind("}")
|
| 53 |
+
data=json.loads(raw[s:e+1]) if s!=-1 else {}
|
| 54 |
+
except: return {"strategies":[],"formulas":[],"systems":[]}
|
| 55 |
+
result = {}
|
| 56 |
+
for kind in ("strategies","formulas","systems"):
|
| 57 |
+
result[kind] = []
|
| 58 |
+
for item in data.get(kind,[]):
|
| 59 |
+
if isinstance(item,dict) and item.get("name"):
|
| 60 |
+
item.update({"source_file":chunk.source_file,
|
| 61 |
+
"source_pages":f"{chunk.page_start}-{chunk.page_end}"})
|
| 62 |
+
item["content_hash"] = _hash(
|
| 63 |
+
item.get("description","") + item.get("plain_text","") +
|
| 64 |
+
item.get("entry_system","") + item.get("name",""))
|
| 65 |
+
result[kind].append(item)
|
| 66 |
+
return result
|
| 67 |
+
|
| 68 |
+
def compile_strategy_code(self, record: dict) -> str:
|
| 69 |
+
"""Ask Claude to generate Julia signal code for this strategy."""
|
| 70 |
+
compact = {k: record.get(k) for k in
|
| 71 |
+
("name","category","description","entry_rules","exit_rules",
|
| 72 |
+
"filters","parameters","mathematical_basis")}
|
| 73 |
+
prompt = cfg.COMPILER_PROMPT.format(
|
| 74 |
+
strategy_json=json.dumps(compact, indent=2))
|
| 75 |
+
code = self._call(prompt)
|
| 76 |
+
if not code: return ""
|
| 77 |
+
if "```" in code:
|
| 78 |
+
lines = code.split("\n")
|
| 79 |
+
in_block = False; out = []
|
| 80 |
+
for line in lines:
|
| 81 |
+
if line.strip().startswith("```"): in_block = not in_block; continue
|
| 82 |
+
if in_block: out.append(line)
|
| 83 |
+
code = "\n".join(out)
|
| 84 |
+
return code.strip()
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
class Deduplicator:
|
| 88 |
+
def __init__(self, threshold=None):
|
| 89 |
+
self.threshold = threshold or cfg.SIMILARITY_THRESHOLD
|
| 90 |
+
self._vec = TfidfVectorizer(ngram_range=(1,2), max_features=5000, stop_words="english")
|
| 91 |
+
|
| 92 |
+
def process(self, extracted, kb):
|
| 93 |
+
stats = {k:{"added":0,"merged":0,"skipped":0} for k in ("strategies","formulas","systems")}
|
| 94 |
+
for kind in ("strategies","formulas","systems"):
|
| 95 |
+
for item in extracted.get(kind,[]):
|
| 96 |
+
stats[kind][self._process_one(item, kb[kind], kind)] += 1
|
| 97 |
+
return stats
|
| 98 |
+
|
| 99 |
+
def _process_one(self, item, store, kind):
|
| 100 |
+
h = item.get("content_hash","")
|
| 101 |
+
for e in store.values():
|
| 102 |
+
if e.get("content_hash") == h:
|
| 103 |
+
self._add_src(item, e); return "skipped"
|
| 104 |
+
sid = self._similar(item, store, kind)
|
| 105 |
+
if sid: self._merge(item, store[sid]); return "merged"
|
| 106 |
+
cid = _cid(item["name"], h, kind)
|
| 107 |
+
item["canonical_id"] = cid
|
| 108 |
+
item["sources"] = [item.get("source_file","")]
|
| 109 |
+
item["layers"] = []
|
| 110 |
+
store[cid] = item
|
| 111 |
+
return "added"
|
| 112 |
+
|
| 113 |
+
def _similar(self, item, store, kind):
|
| 114 |
+
if not store: return None
|
| 115 |
+
texts = [_text(v,kind) for v in store.values()] + [_text(item,kind)]
|
| 116 |
+
try:
|
| 117 |
+
mat = self._vec.fit_transform(texts)
|
| 118 |
+
sims = cosine_similarity(mat[-1], mat[:-1])[0]
|
| 119 |
+
idx = int(np.argmax(sims))
|
| 120 |
+
if sims[idx] >= self.threshold:
|
| 121 |
+
return list(store.keys())[idx]
|
| 122 |
+
except: pass
|
| 123 |
+
return None
|
| 124 |
+
|
| 125 |
+
@staticmethod
|
| 126 |
+
def _add_src(item, existing):
|
| 127 |
+
s = item.get("source_file","")
|
| 128 |
+
if s and s not in existing.get("sources",[]):
|
| 129 |
+
existing.setdefault("sources",[]).append(s)
|
| 130 |
+
|
| 131 |
+
@staticmethod
|
| 132 |
+
def _merge(item, existing):
|
| 133 |
+
Deduplicator._add_src(item, existing)
|
| 134 |
+
layers = existing.setdefault("layers",[])
|
| 135 |
+
if item.get("content_hash") not in {l.get("content_hash") for l in layers}:
|
| 136 |
+
layers.append({"source_file":item.get("source_file"),
|
| 137 |
+
"content_hash":item.get("content_hash"),
|
| 138 |
+
"data":{k:v for k,v in item.items()
|
| 139 |
+
if k not in ("sources","layers","canonical_id")}})
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
def _hash(text):
|
| 143 |
+
return hashlib.sha256(" ".join(text.lower().split()).encode()).hexdigest()[:16]
|
| 144 |
+
|
| 145 |
+
def _cid(name, h, kind):
|
| 146 |
+
return hashlib.md5(f"{kind}_{name}_{h}".encode()).hexdigest()[:12]
|
| 147 |
+
|
| 148 |
+
def _text(item, kind):
|
| 149 |
+
if kind=="strategies":
|
| 150 |
+
return f"{item.get('name','')} {item.get('description','')} {' '.join(item.get('entry_rules',[]))}"
|
| 151 |
+
if kind=="formulas":
|
| 152 |
+
return f"{item.get('name','')} {item.get('plain_text','')} {item.get('purpose','')}"
|
| 153 |
+
return f"{item.get('name','')} {item.get('entry_system','')} {item.get('exit_system','')}"
|
pipeline/julia_bridge.py
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
pipeline/julia_bridge.py
|
| 3 |
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 4 |
+
Python bridge to Julia via juliacall.
|
| 5 |
+
|
| 6 |
+
Julia is loaded ONCE on first use (lazy init) to avoid
|
| 7 |
+
slowing down Space startup. Subsequent calls are instant.
|
| 8 |
+
|
| 9 |
+
juliacall converts:
|
| 10 |
+
Python list/np.ndarray β Julia Vector{Float64}
|
| 11 |
+
Python dict β Julia Dict
|
| 12 |
+
Julia Dict{String,Any} β Python dict
|
| 13 |
+
Julia Vector β Python list
|
| 14 |
+
|
| 15 |
+
All heavy computation β indicators, backtest, optimizer β
|
| 16 |
+
runs in Julia. Python only calls this bridge.
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
from __future__ import annotations
|
| 20 |
+
import os
|
| 21 |
+
from pathlib import Path
|
| 22 |
+
from typing import Optional
|
| 23 |
+
import numpy as np
|
| 24 |
+
from loguru import logger
|
| 25 |
+
|
| 26 |
+
# ββ Julia runtime (loaded once) βββββββββββββββββββββββ
|
| 27 |
+
_jl = None
|
| 28 |
+
_QE = None # QuantEngine module
|
| 29 |
+
|
| 30 |
+
JULIA_SRC = Path(__file__).parent.parent / "src"
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def _init_julia():
|
| 34 |
+
"""Lazy-load Julia + QuantEngine on first call."""
|
| 35 |
+
global _jl, _QE
|
| 36 |
+
if _jl is not None:
|
| 37 |
+
return
|
| 38 |
+
|
| 39 |
+
logger.info("Initializing Julia runtimeβ¦")
|
| 40 |
+
try:
|
| 41 |
+
from juliacall import Main as jl
|
| 42 |
+
_jl = jl
|
| 43 |
+
|
| 44 |
+
# Load the QuantEngine module
|
| 45 |
+
_jl.seval(f'push!(LOAD_PATH, "{JULIA_SRC}")')
|
| 46 |
+
_jl.seval(f'include("{JULIA_SRC / "QuantEngine.jl"}")')
|
| 47 |
+
_jl.seval("using .QuantEngine")
|
| 48 |
+
_QE = _jl.QuantEngine
|
| 49 |
+
logger.info("Julia QuantEngine loaded β")
|
| 50 |
+
except Exception as e:
|
| 51 |
+
logger.error(f"Julia init failed: {e}")
|
| 52 |
+
raise RuntimeError(f"Julia init failed: {e}") from e
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def _arr(x) -> "jl.Vector":
|
| 56 |
+
"""Convert Python list/ndarray to Julia Vector{Float64}."""
|
| 57 |
+
_init_julia()
|
| 58 |
+
import numpy as np
|
| 59 |
+
arr = np.asarray(x, dtype=np.float64)
|
| 60 |
+
return _jl.convert(_jl.Vector[_jl.Float64], arr.tolist())
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def _signals(x) -> "jl.Vector":
|
| 64 |
+
"""Convert signal array to Julia Vector{Int}."""
|
| 65 |
+
_init_julia()
|
| 66 |
+
arr = [int(v) for v in x]
|
| 67 |
+
return _jl.convert(_jl.Vector[_jl.Int64], arr)
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def _pydict(jl_dict) -> dict:
|
| 71 |
+
"""Recursively convert Julia Dict to Python dict."""
|
| 72 |
+
out = {}
|
| 73 |
+
for k in jl_dict.keys():
|
| 74 |
+
v = jl_dict[k]
|
| 75 |
+
k_py = str(k)
|
| 76 |
+
if hasattr(v, "keys"):
|
| 77 |
+
out[k_py] = _pydict(v)
|
| 78 |
+
elif hasattr(v, "__iter__") and not isinstance(v, str):
|
| 79 |
+
out[k_py] = list(v)
|
| 80 |
+
elif isinstance(v, bool):
|
| 81 |
+
out[k_py] = bool(v)
|
| 82 |
+
elif hasattr(v, "__float__"):
|
| 83 |
+
out[k_py] = float(v)
|
| 84 |
+
elif hasattr(v, "__int__"):
|
| 85 |
+
out[k_py] = int(v)
|
| 86 |
+
else:
|
| 87 |
+
out[k_py] = v
|
| 88 |
+
return out
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
# ββ Public API ββββββββββββββββββββββββββββββββββββββββ
|
| 92 |
+
|
| 93 |
+
def full_backtest_pipeline(
|
| 94 |
+
strategy_code: str,
|
| 95 |
+
strategy_name: str,
|
| 96 |
+
open_p: list | np.ndarray,
|
| 97 |
+
high: list | np.ndarray,
|
| 98 |
+
low: list | np.ndarray,
|
| 99 |
+
close: list | np.ndarray,
|
| 100 |
+
volume: list | np.ndarray,
|
| 101 |
+
timeframe: str,
|
| 102 |
+
symbol: str,
|
| 103 |
+
n_windows: int = 5,
|
| 104 |
+
is_ratio: float = 0.70,
|
| 105 |
+
min_trades: int = 30,
|
| 106 |
+
min_sharpe: float = 0.5,
|
| 107 |
+
max_combos: int = 300,
|
| 108 |
+
initial_equity: float = 10_000.0,
|
| 109 |
+
commission_pct: float = 0.0002,
|
| 110 |
+
risk_per_trade: float = 0.01,
|
| 111 |
+
) -> dict:
|
| 112 |
+
"""
|
| 113 |
+
Run full Julia backtest pipeline.
|
| 114 |
+
Compile strategy code β walk-forward optimize β return results dict.
|
| 115 |
+
"""
|
| 116 |
+
_init_julia()
|
| 117 |
+
try:
|
| 118 |
+
result = _QE.full_backtest_pipeline(
|
| 119 |
+
strategy_code, strategy_name,
|
| 120 |
+
_arr(open_p), _arr(high), _arr(low),
|
| 121 |
+
_arr(close), _arr(volume),
|
| 122 |
+
timeframe, symbol,
|
| 123 |
+
n_windows = n_windows,
|
| 124 |
+
is_ratio = is_ratio,
|
| 125 |
+
min_trades = min_trades,
|
| 126 |
+
min_sharpe = min_sharpe,
|
| 127 |
+
max_combos = max_combos,
|
| 128 |
+
initial_equity = initial_equity,
|
| 129 |
+
commission_pct = commission_pct,
|
| 130 |
+
risk_per_trade = risk_per_trade,
|
| 131 |
+
)
|
| 132 |
+
return _pydict(result)
|
| 133 |
+
except Exception as e:
|
| 134 |
+
logger.error(f"Julia pipeline error [{strategy_name}]: {e}")
|
| 135 |
+
return {
|
| 136 |
+
"is_valid": False,
|
| 137 |
+
"error": str(e),
|
| 138 |
+
"strategy": strategy_name,
|
| 139 |
+
"symbol": symbol,
|
| 140 |
+
"timeframe": timeframe,
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def run_backtest_only(
|
| 145 |
+
open_p: np.ndarray, high: np.ndarray, low: np.ndarray,
|
| 146 |
+
close: np.ndarray, volume: np.ndarray,
|
| 147 |
+
signals: list | np.ndarray,
|
| 148 |
+
timeframe: str = "1h",
|
| 149 |
+
initial_equity: float = 10_000.0,
|
| 150 |
+
commission_pct: float = 0.0002,
|
| 151 |
+
risk_per_trade: float = 0.01,
|
| 152 |
+
) -> dict:
|
| 153 |
+
"""Run a single backtest with pre-computed signals."""
|
| 154 |
+
_init_julia()
|
| 155 |
+
try:
|
| 156 |
+
cfg = _QE.BacktestConfig(
|
| 157 |
+
initial_equity=initial_equity,
|
| 158 |
+
commission_pct=commission_pct,
|
| 159 |
+
risk_per_trade=risk_per_trade,
|
| 160 |
+
)
|
| 161 |
+
r = _QE.run_backtest(
|
| 162 |
+
_arr(open_p), _arr(high), _arr(low),
|
| 163 |
+
_arr(close), _arr(volume),
|
| 164 |
+
_signals(signals), timeframe, cfg,
|
| 165 |
+
)
|
| 166 |
+
return {
|
| 167 |
+
"total_return": float(r.total_return),
|
| 168 |
+
"cagr": float(r.cagr),
|
| 169 |
+
"sharpe": float(r.sharpe),
|
| 170 |
+
"sortino": float(r.sortino),
|
| 171 |
+
"calmar": float(r.calmar),
|
| 172 |
+
"max_dd": float(r.max_dd),
|
| 173 |
+
"n_trades": int(r.n_trades),
|
| 174 |
+
"win_rate": float(r.win_rate),
|
| 175 |
+
"profit_factor": float(r.profit_factor),
|
| 176 |
+
"final_equity": float(r.final_equity),
|
| 177 |
+
"equity_curve": list(r.equity_curve),
|
| 178 |
+
"is_valid": bool(r.is_valid),
|
| 179 |
+
"error": str(r.error_msg),
|
| 180 |
+
}
|
| 181 |
+
except Exception as e:
|
| 182 |
+
return {"is_valid": False, "error": str(e)}
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
def indicator(name: str, *args, **kwargs):
|
| 186 |
+
"""
|
| 187 |
+
Call any indicator by name from Python.
|
| 188 |
+
Returns numpy array.
|
| 189 |
+
|
| 190 |
+
Example:
|
| 191 |
+
rsi_values = indicator("rsi", close_array, 14)
|
| 192 |
+
upper, mid, lower = indicator("bbands", close_array, 20, 2.0)
|
| 193 |
+
"""
|
| 194 |
+
_init_julia()
|
| 195 |
+
fn = getattr(_QE, name, None)
|
| 196 |
+
if fn is None:
|
| 197 |
+
raise ValueError(f"Unknown indicator: {name}")
|
| 198 |
+
jl_args = [_arr(a) if isinstance(a, (list, np.ndarray)) else a
|
| 199 |
+
for a in args]
|
| 200 |
+
result = fn(*jl_args, **kwargs)
|
| 201 |
+
if isinstance(result, tuple):
|
| 202 |
+
return tuple(np.array(list(r)) for r in result)
|
| 203 |
+
return np.array(list(result))
|
| 204 |
+
|
| 205 |
+
|
| 206 |
+
def julia_available() -> bool:
|
| 207 |
+
"""Check if Julia runtime is available."""
|
| 208 |
+
try:
|
| 209 |
+
_init_julia()
|
| 210 |
+
return _QE is not None
|
| 211 |
+
except Exception:
|
| 212 |
+
return False
|
pipeline/pdf_processor.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""pipeline/pdf_processor.py β PDF text extraction + OCR fallback."""
|
| 2 |
+
import re
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
from typing import Generator
|
| 5 |
+
from dataclasses import dataclass
|
| 6 |
+
import fitz, pdfplumber, pytesseract
|
| 7 |
+
from pdf2image import convert_from_path
|
| 8 |
+
from loguru import logger
|
| 9 |
+
import utils.config as cfg
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@dataclass
|
| 13 |
+
class Chunk:
|
| 14 |
+
source_file: str
|
| 15 |
+
page_start: int
|
| 16 |
+
page_end: int
|
| 17 |
+
text: str
|
| 18 |
+
was_ocr: bool = False
|
| 19 |
+
|
| 20 |
+
@property
|
| 21 |
+
def word_count(self): return len(self.text.split())
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
class PDFProcessor:
|
| 25 |
+
MIN_CHARS = 80
|
| 26 |
+
CHUNK_WORDS = cfg.MAX_TOKENS_PER_CHUNK // 2
|
| 27 |
+
|
| 28 |
+
def process(self, pdf_path: Path) -> Generator[Chunk, None, None]:
|
| 29 |
+
pages = self._extract_pages(pdf_path)
|
| 30 |
+
yield from self._chunk(pages, pdf_path.name)
|
| 31 |
+
|
| 32 |
+
def _extract_pages(self, path):
|
| 33 |
+
mu = {}
|
| 34 |
+
try:
|
| 35 |
+
doc = fitz.open(str(path))
|
| 36 |
+
for i, pg in enumerate(doc): mu[i+1] = self._clean(pg.get_text("text"))
|
| 37 |
+
doc.close()
|
| 38 |
+
except: pass
|
| 39 |
+
pl = {}
|
| 40 |
+
try:
|
| 41 |
+
with pdfplumber.open(str(path)) as pdf:
|
| 42 |
+
for i, pg in enumerate(pdf.pages):
|
| 43 |
+
try: pl[i+1] = self._clean(pg.extract_text() or "")
|
| 44 |
+
except: pl[i+1] = ""
|
| 45 |
+
except: pass
|
| 46 |
+
total = max(len(mu), len(pl), 1)
|
| 47 |
+
results = []; ocr_needed = []
|
| 48 |
+
for pnum in range(1, total+1):
|
| 49 |
+
best = mu.get(pnum,"") if len(mu.get(pnum,"")) > len(pl.get(pnum,"")) else pl.get(pnum,"")
|
| 50 |
+
if len(best) >= self.MIN_CHARS: results.append((pnum, best, False))
|
| 51 |
+
else: results.append((pnum, best, False)); ocr_needed.append(pnum)
|
| 52 |
+
if ocr_needed:
|
| 53 |
+
ocr = self._ocr(path, ocr_needed)
|
| 54 |
+
for i,(pnum,_,_) in enumerate(results):
|
| 55 |
+
if pnum in ocr: results[i] = (pnum, ocr[pnum], True)
|
| 56 |
+
return results
|
| 57 |
+
|
| 58 |
+
def _ocr(self, path, pages):
|
| 59 |
+
out = {}
|
| 60 |
+
try:
|
| 61 |
+
imgs = convert_from_path(str(path), dpi=cfg.OCR_DPI,
|
| 62 |
+
first_page=min(pages), last_page=max(pages))
|
| 63 |
+
for i, pnum in enumerate(range(min(pages), max(pages)+1)):
|
| 64 |
+
if pnum in pages and i < len(imgs):
|
| 65 |
+
out[pnum] = self._clean(pytesseract.image_to_string(imgs[i],lang="eng",config="--psm 6"))
|
| 66 |
+
except Exception as e: logger.warning(f"OCR: {e}")
|
| 67 |
+
return out
|
| 68 |
+
|
| 69 |
+
def _chunk(self, pages, source):
|
| 70 |
+
buf, words, p_start, any_ocr = [], 0, 1, False
|
| 71 |
+
for pnum, text, ocr in pages:
|
| 72 |
+
if not text: continue
|
| 73 |
+
buf.append(text); words += len(text.split())
|
| 74 |
+
if ocr: any_ocr = True
|
| 75 |
+
if words >= self.CHUNK_WORDS:
|
| 76 |
+
yield Chunk(source, p_start, pnum, "\n\n".join(buf), any_ocr)
|
| 77 |
+
buf, words, p_start, any_ocr = [text], len(text.split()), pnum, ocr
|
| 78 |
+
if buf:
|
| 79 |
+
last = pages[-1][0] if pages else p_start
|
| 80 |
+
yield Chunk(source, p_start, last, "\n\n".join(buf), any_ocr)
|
| 81 |
+
|
| 82 |
+
@staticmethod
|
| 83 |
+
def _clean(text):
|
| 84 |
+
if not text: return ""
|
| 85 |
+
text = re.sub(r"(\w)-\n(\w)", r"\1\2", text)
|
| 86 |
+
text = re.sub(r"\n{3,}", "\n\n", text)
|
| 87 |
+
text = re.sub(r"[ \t]+", " ", text)
|
| 88 |
+
return text.strip()
|
requirements.txt
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# JuliaβPython bridge
|
| 2 |
+
juliacall>=0.9.23
|
| 3 |
+
|
| 4 |
+
# Gradio UI
|
| 5 |
+
gradio>=5.25.0
|
| 6 |
+
|
| 7 |
+
# Claude API
|
| 8 |
+
anthropic>=0.30.0
|
| 9 |
+
|
| 10 |
+
# HuggingFace β no version pin, Spaces base image controls this
|
| 11 |
+
huggingface_hub
|
| 12 |
+
|
| 13 |
+
# PDF processing
|
| 14 |
+
pypdf>=4.2.0
|
| 15 |
+
pdfplumber>=0.11.0
|
| 16 |
+
pymupdf>=1.24.0
|
| 17 |
+
pdf2image>=1.17.0
|
| 18 |
+
pytesseract>=0.3.10
|
| 19 |
+
Pillow>=10.3.0
|
| 20 |
+
|
| 21 |
+
# Data
|
| 22 |
+
numpy>=1.26.0
|
| 23 |
+
pandas>=2.2.0
|
| 24 |
+
pyarrow>=16.0.0
|
| 25 |
+
|
| 26 |
+
# Deduplication (used in extractor.py)
|
| 27 |
+
scikit-learn>=1.5.0
|
| 28 |
+
|
| 29 |
+
# Utilities
|
| 30 |
+
loguru>=0.7.2
|
| 31 |
+
pydantic>=2.7.0
|
| 32 |
+
tqdm>=4.66.0
|
src/BacktestEngine.jl
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
BacktestEngine.jl β Vectorized backtest engine.
|
| 3 |
+
No includes. Receives Indicators module via QuantEngine parent scope.
|
| 4 |
+
"""
|
| 5 |
+
module BacktestEngine
|
| 6 |
+
|
| 7 |
+
using Statistics
|
| 8 |
+
|
| 9 |
+
export run_backtest, BacktestResult, BacktestConfig
|
| 10 |
+
|
| 11 |
+
# Indicators injected by QuantEngine before this module is used
|
| 12 |
+
# atr() is accessed via the parent module's scope at call time
|
| 13 |
+
|
| 14 |
+
const BARS_PER_YEAR = Dict(
|
| 15 |
+
"1m"=>525_600,"3m"=>175_200,"5m"=>105_120,"15m"=>35_040,"30m"=>17_520,
|
| 16 |
+
"1h"=>8_760,"2h"=>4_380,"4h"=>2_190,"6h"=>1_460,"12h"=>730,
|
| 17 |
+
"1d"=>252,"1w"=>52,
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
Base.@kwdef struct BacktestConfig
|
| 21 |
+
initial_equity :: Float64 = 10_000.0
|
| 22 |
+
commission_pct :: Float64 = 0.0002
|
| 23 |
+
slippage_pct :: Float64 = 0.0001
|
| 24 |
+
risk_per_trade :: Float64 = 0.01
|
| 25 |
+
atr_mult :: Float64 = 2.0
|
| 26 |
+
max_pos_pct :: Float64 = 0.20
|
| 27 |
+
atr_period :: Int = 14
|
| 28 |
+
end
|
| 29 |
+
|
| 30 |
+
mutable struct BacktestResult
|
| 31 |
+
total_return :: Float64; cagr :: Float64
|
| 32 |
+
sharpe :: Float64; sortino :: Float64; calmar :: Float64
|
| 33 |
+
max_dd :: Float64; max_dd_bars :: Int
|
| 34 |
+
n_trades :: Int; n_wins :: Int; win_rate :: Float64
|
| 35 |
+
profit_factor :: Float64; avg_win_pct :: Float64; avg_loss_pct :: Float64
|
| 36 |
+
expectancy :: Float64; avg_bars_held :: Float64
|
| 37 |
+
max_consec_wins:: Int; max_consec_loss:: Int
|
| 38 |
+
final_equity :: Float64; total_comm :: Float64
|
| 39 |
+
equity_curve :: Vector{Float64}
|
| 40 |
+
n_bars :: Int; is_valid :: Bool; error_msg :: String
|
| 41 |
+
end
|
| 42 |
+
|
| 43 |
+
BacktestResult(; n_bars=0, is_valid=false, error_msg="") = BacktestResult(
|
| 44 |
+
0.0,0.0,0.0,0.0,0.0,0.0,0, 0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,
|
| 45 |
+
10_000.0,0.0, Float64[], n_bars,is_valid,error_msg)
|
| 46 |
+
|
| 47 |
+
function run_backtest(
|
| 48 |
+
open_p::Vector{Float64}, high::Vector{Float64}, low::Vector{Float64},
|
| 49 |
+
close::Vector{Float64}, volume::Vector{Float64}, signals::Vector{Int},
|
| 50 |
+
timeframe::String="1h", cfg::BacktestConfig=BacktestConfig(),
|
| 51 |
+
atr_fn::Function=identity, # passed from QuantEngine to avoid circular dep
|
| 52 |
+
)::BacktestResult
|
| 53 |
+
n = length(close)
|
| 54 |
+
n < 50 && return BacktestResult(; n_bars=n, error_msg="Need β₯50 bars, got $n")
|
| 55 |
+
|
| 56 |
+
atr_v = atr_fn(high, low, close, cfg.atr_period)
|
| 57 |
+
equity = cfg.initial_equity
|
| 58 |
+
eq = fill(cfg.initial_equity, n)
|
| 59 |
+
|
| 60 |
+
tpnls = Vector{Float64}(undef, nΓ·2+1)
|
| 61 |
+
twins = Vector{Bool}(undef, nΓ·2+1)
|
| 62 |
+
tbars = Vector{Int}(undef, nΓ·2+1)
|
| 63 |
+
tents = Vector{Float64}(undef, nΓ·2+1)
|
| 64 |
+
tszs = Vector{Float64}(undef, nΓ·2+1)
|
| 65 |
+
nt = 0; tcomm = 0.0
|
| 66 |
+
|
| 67 |
+
pos=0; epx=0.0; psz=0.0; spx=0.0; ebar=1; ltrade=0
|
| 68 |
+
|
| 69 |
+
@inbounds for i in 2:n
|
| 70 |
+
px=close[i]; sig=signals[i]
|
| 71 |
+
if pos != 0
|
| 72 |
+
hit = (pos==1 && low[i]<=spx) || (pos==-1 && high[i]>=spx)
|
| 73 |
+
if hit
|
| 74 |
+
ep = spx*(1.0+cfg.slippage_pct*pos)
|
| 75 |
+
pnl = pos*(ep-epx)*psz; comm=(epx+ep)*psz*cfg.commission_pct
|
| 76 |
+
nt+=1; tpnls[nt]=pnl-comm; twins[nt]=pnl>comm
|
| 77 |
+
tbars[nt]=i-ebar; tents[nt]=epx; tszs[nt]=psz
|
| 78 |
+
tcomm+=comm; equity+=pnl-comm; pos=0; ltrade=i
|
| 79 |
+
end
|
| 80 |
+
end
|
| 81 |
+
if pos!=0 && (sig==0 || sig==-pos)
|
| 82 |
+
ep=px*(1.0+cfg.slippage_pct*pos)
|
| 83 |
+
pnl=pos*(ep-epx)*psz; comm=(epx+ep)*psz*cfg.commission_pct
|
| 84 |
+
nt+=1; tpnls[nt]=pnl-comm; twins[nt]=pnl>comm
|
| 85 |
+
tbars[nt]=i-ebar; tents[nt]=epx; tszs[nt]=psz
|
| 86 |
+
tcomm+=comm; equity+=pnl-comm; pos=0; ltrade=i
|
| 87 |
+
end
|
| 88 |
+
if pos==0 && sig!=0 && (i-ltrade)>=1
|
| 89 |
+
ep=px*(1.0+cfg.slippage_pct*sig)
|
| 90 |
+
av = isnan(atr_v[i]) ? px*0.01 : atr_v[i]
|
| 91 |
+
dist=cfg.atr_mult*av
|
| 92 |
+
sz=min(equity*cfg.risk_per_trade/max(dist,1e-8), equity*cfg.max_pos_pct/ep)
|
| 93 |
+
sz=max(sz,1e-8)
|
| 94 |
+
pos=sig; epx=ep; psz=sz; spx=ep-sig*dist; ebar=i
|
| 95 |
+
end
|
| 96 |
+
eq[i] = equity + (pos!=0 ? pos*(close[i]-epx)*psz : 0.0)
|
| 97 |
+
end
|
| 98 |
+
if pos!=0
|
| 99 |
+
ep=close[n]; pnl=pos*(ep-epx)*psz; comm=(epx+ep)*psz*cfg.commission_pct
|
| 100 |
+
nt+=1; tpnls[nt]=pnl-comm; twins[nt]=pnl>comm
|
| 101 |
+
tbars[nt]=n-ebar; tents[nt]=epx; tszs[nt]=psz
|
| 102 |
+
tcomm+=comm; equity+=pnl-comm; eq[n]=equity
|
| 103 |
+
end
|
| 104 |
+
|
| 105 |
+
return _metrics(eq, tpnls[1:nt], twins[1:nt], tbars[1:nt],
|
| 106 |
+
tents[1:nt], tszs[1:nt], tcomm, n, timeframe, cfg)
|
| 107 |
+
end
|
| 108 |
+
|
| 109 |
+
function _metrics(eq,pnls,wins,bars,ents,szs,tcomm,n_bars,tf,cfg)
|
| 110 |
+
init=cfg.initial_equity; final=eq[end]; bpy=get(BARS_PER_YEAR,tf,252)
|
| 111 |
+
r=BacktestResult(;n_bars,is_valid=true)
|
| 112 |
+
r.equity_curve=eq; r.final_equity=final; r.total_comm=tcomm
|
| 113 |
+
r.total_return=(final-init)/init*100.0
|
| 114 |
+
yrs=n_bars/bpy
|
| 115 |
+
r.cagr = yrs>0&&final>0 ? ((final/init)^(1.0/yrs)-1.0)*100.0 : 0.0
|
| 116 |
+
peak=eq[1]; mxdd=0.0; ddr=0; mxddb=0
|
| 117 |
+
for v in eq
|
| 118 |
+
peak=max(peak,v); dd=(peak-v)/peak; mxdd=max(mxdd,dd)
|
| 119 |
+
v<peak ? (ddr+=1; mxddb=max(mxddb,ddr)) : (ddr=0)
|
| 120 |
+
end
|
| 121 |
+
r.max_dd=mxdd*100.0; r.max_dd_bars=mxddb
|
| 122 |
+
rets=diff(eq)./eq[1:end-1]; filter!(!isnan,rets)
|
| 123 |
+
if length(rets)>1
|
| 124 |
+
mu=mean(rets); sg=std(rets)
|
| 125 |
+
ds_v=filter(x->x<0,rets); ds=length(ds_v)>1 ? std(ds_v) : sg
|
| 126 |
+
af=sqrt(Float64(bpy))
|
| 127 |
+
r.sharpe=sg>0 ? mu/sg*af : 0.0; r.sortino=ds>0 ? mu/ds*af : 0.0
|
| 128 |
+
r.calmar=r.max_dd>0 ? r.cagr/r.max_dd : 0.0
|
| 129 |
+
end
|
| 130 |
+
r.n_trades=length(pnls)
|
| 131 |
+
r.n_trades==0 && return r
|
| 132 |
+
nw=count(wins); r.n_wins=nw; r.win_rate=nw/r.n_trades*100.0
|
| 133 |
+
gw=sum(pnls[wins]); gl=abs(sum(pnls[.!wins]))
|
| 134 |
+
r.profit_factor=gl>0 ? gw/gl : (gw>0 ? Inf : 0.0)
|
| 135 |
+
pct=pnls./(ents.*szs.+1e-10).*100.0
|
| 136 |
+
r.avg_win_pct = nw>0 ? mean(pct[wins]) : 0.0
|
| 137 |
+
r.avg_loss_pct = (r.n_trades-nw)>0 ? mean(pct[.!wins]) : 0.0
|
| 138 |
+
r.expectancy=r.win_rate/100.0*r.avg_win_pct+(1-r.win_rate/100.0)*r.avg_loss_pct
|
| 139 |
+
r.avg_bars_held=mean(Float64.(bars))
|
| 140 |
+
r.max_consec_wins=_maxrun(wins); r.max_consec_loss=_maxrun(.!wins)
|
| 141 |
+
return r
|
| 142 |
+
end
|
| 143 |
+
|
| 144 |
+
function _maxrun(b::Vector{Bool})::Int
|
| 145 |
+
mx=run=0; for v in b; v ? (run+=1;mx=max(mx,run)) : (run=0); end; return mx
|
| 146 |
+
end
|
| 147 |
+
|
| 148 |
+
end # module BacktestEngine
|
src/Indicators.jl
ADDED
|
@@ -0,0 +1,223 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Indicators.jl β Vectorized technical indicator library.
|
| 3 |
+
Standalone module. No includes. No external deps beyond Statistics.
|
| 4 |
+
"""
|
| 5 |
+
module Indicators
|
| 6 |
+
|
| 7 |
+
using Statistics
|
| 8 |
+
|
| 9 |
+
export sma, ema, wma, tema, dema,
|
| 10 |
+
rsi, macd, stoch, cci, williams_r,
|
| 11 |
+
atr, bbands, keltner, donchian, adx,
|
| 12 |
+
vwap, obv, cmf, zscore, std_dev,
|
| 13 |
+
momentum, roc, highest, lowest,
|
| 14 |
+
crossover, crossunder
|
| 15 |
+
|
| 16 |
+
# ββ Trend βββββββββββββββββββββββββββββββββββββββββββββ
|
| 17 |
+
|
| 18 |
+
function sma(x::Vector{Float64}, n::Int)::Vector{Float64}
|
| 19 |
+
len = length(x); out = fill(NaN, len); s = 0.0
|
| 20 |
+
for i in 1:len
|
| 21 |
+
s += x[i]
|
| 22 |
+
if i >= n
|
| 23 |
+
i > n && (s -= x[i-n])
|
| 24 |
+
out[i] = s / n
|
| 25 |
+
end
|
| 26 |
+
end
|
| 27 |
+
return out
|
| 28 |
+
end
|
| 29 |
+
|
| 30 |
+
function ema(x::Vector{Float64}, n::Int)::Vector{Float64}
|
| 31 |
+
len = length(x); out = fill(NaN, len)
|
| 32 |
+
k = 2.0 / (n + 1.0)
|
| 33 |
+
# seed: SMA of first n non-NaN values
|
| 34 |
+
s = 0.0; cnt = 0; seed_i = 0
|
| 35 |
+
for i in 1:len
|
| 36 |
+
isnan(x[i]) && continue
|
| 37 |
+
s += x[i]; cnt += 1
|
| 38 |
+
if cnt == n
|
| 39 |
+
seed_i = i; out[i] = s / n
|
| 40 |
+
val = out[i]
|
| 41 |
+
for j in (i+1):len
|
| 42 |
+
isnan(x[j]) && continue
|
| 43 |
+
val = x[j] * k + val * (1.0 - k)
|
| 44 |
+
out[j] = val
|
| 45 |
+
end
|
| 46 |
+
break
|
| 47 |
+
end
|
| 48 |
+
end
|
| 49 |
+
return out
|
| 50 |
+
end
|
| 51 |
+
|
| 52 |
+
function wma(x::Vector{Float64}, n::Int)::Vector{Float64}
|
| 53 |
+
len = length(x); out = fill(NaN, len)
|
| 54 |
+
ws = n * (n+1) / 2.0
|
| 55 |
+
for i in n:len
|
| 56 |
+
s = 0.0
|
| 57 |
+
for j in 1:n; s += x[i-n+j] * j; end
|
| 58 |
+
out[i] = s / ws
|
| 59 |
+
end
|
| 60 |
+
return out
|
| 61 |
+
end
|
| 62 |
+
|
| 63 |
+
tema(x::Vector{Float64}, n::Int) = let e1=ema(x,n),e2=ema(e1,n),e3=ema(e2,n); 3.0.*e1 .- 3.0.*e2 .+ e3 end
|
| 64 |
+
dema(x::Vector{Float64}, n::Int) = let e1=ema(x,n),e2=ema(e1,n); 2.0.*e1 .- e2 end
|
| 65 |
+
|
| 66 |
+
# ββ Oscillators βββββββββββββββββββββββββββββββββββββββ
|
| 67 |
+
|
| 68 |
+
function rsi(close::Vector{Float64}, n::Int=14)::Vector{Float64}
|
| 69 |
+
len = length(close); out = fill(NaN, len)
|
| 70 |
+
ag = 0.0; al = 0.0
|
| 71 |
+
for i in 2:(n+1)
|
| 72 |
+
i > len && break
|
| 73 |
+
d = close[i] - close[i-1]
|
| 74 |
+
d > 0 ? (ag += d) : (al += abs(d))
|
| 75 |
+
end
|
| 76 |
+
ag /= n; al /= n
|
| 77 |
+
n+1 <= len && (out[n+1] = 100.0 - 100.0/(1.0 + (al==0 ? 1e10 : ag/al)))
|
| 78 |
+
for i in (n+2):len
|
| 79 |
+
d = close[i] - close[i-1]
|
| 80 |
+
g = d > 0 ? d : 0.0; l = d < 0 ? abs(d) : 0.0
|
| 81 |
+
ag = (ag*(n-1)+g)/n; al = (al*(n-1)+l)/n
|
| 82 |
+
out[i] = 100.0 - 100.0/(1.0 + (al==0 ? 1e10 : ag/al))
|
| 83 |
+
end
|
| 84 |
+
return out
|
| 85 |
+
end
|
| 86 |
+
|
| 87 |
+
function macd(close::Vector{Float64}; fast::Int=12, slow::Int=26, sig::Int=9)
|
| 88 |
+
ml = ema(close,fast) .- ema(close,slow)
|
| 89 |
+
sl = ema(ml, sig)
|
| 90 |
+
return ml, sl, ml .- sl
|
| 91 |
+
end
|
| 92 |
+
|
| 93 |
+
function stoch(high::Vector{Float64}, low::Vector{Float64}, close::Vector{Float64};
|
| 94 |
+
k::Int=14, d::Int=3)
|
| 95 |
+
len = length(close); K = fill(NaN, len)
|
| 96 |
+
for i in k:len
|
| 97 |
+
hh = maximum(high[i-k+1:i]); ll = minimum(low[i-k+1:i])
|
| 98 |
+
K[i] = hh==ll ? 50.0 : 100.0*(close[i]-ll)/(hh-ll)
|
| 99 |
+
end
|
| 100 |
+
return K, sma(K, d)
|
| 101 |
+
end
|
| 102 |
+
|
| 103 |
+
function cci(high::Vector{Float64}, low::Vector{Float64}, close::Vector{Float64}, n::Int=20)::Vector{Float64}
|
| 104 |
+
len = length(close); tp = (high.+low.+close)./3.0; out = fill(NaN, len)
|
| 105 |
+
for i in n:len
|
| 106 |
+
w = tp[i-n+1:i]; m = mean(w); md = mean(abs.(w.-m))
|
| 107 |
+
out[i] = md==0 ? 0.0 : (tp[i]-m)/(0.015*md)
|
| 108 |
+
end
|
| 109 |
+
return out
|
| 110 |
+
end
|
| 111 |
+
|
| 112 |
+
function williams_r(high::Vector{Float64}, low::Vector{Float64}, close::Vector{Float64}, n::Int=14)::Vector{Float64}
|
| 113 |
+
len = length(close); out = fill(NaN, len)
|
| 114 |
+
for i in n:len
|
| 115 |
+
hh = maximum(high[i-n+1:i]); ll = minimum(low[i-n+1:i])
|
| 116 |
+
out[i] = hh==ll ? -50.0 : -100.0*(hh-close[i])/(hh-ll)
|
| 117 |
+
end
|
| 118 |
+
return out
|
| 119 |
+
end
|
| 120 |
+
|
| 121 |
+
momentum(x::Vector{Float64}, n::Int=10) = let len=length(x),out=fill(NaN,len); for i in (n+1):len; out[i]=x[i]-x[i-n]; end; out end
|
| 122 |
+
roc(x::Vector{Float64}, n::Int=10) = let len=length(x),out=fill(NaN,len); for i in (n+1):len; out[i]=x[i-n]==0 ? 0.0 : 100.0*(x[i]-x[i-n])/x[i-n]; end; out end
|
| 123 |
+
|
| 124 |
+
# ββ Volatility ββββββββββββββββββββββββββββββββββββββββ
|
| 125 |
+
|
| 126 |
+
function _tr(high::Vector{Float64}, low::Vector{Float64}, close::Vector{Float64})::Vector{Float64}
|
| 127 |
+
len = length(close); tr = fill(NaN, len)
|
| 128 |
+
tr[1] = high[1]-low[1]
|
| 129 |
+
for i in 2:len; tr[i] = max(high[i]-low[i], abs(high[i]-close[i-1]), abs(low[i]-close[i-1])); end
|
| 130 |
+
return tr
|
| 131 |
+
end
|
| 132 |
+
|
| 133 |
+
atr(high::Vector{Float64}, low::Vector{Float64}, close::Vector{Float64}, n::Int=14) = ema(_tr(high,low,close), n)
|
| 134 |
+
|
| 135 |
+
function bbands(close::Vector{Float64}, n::Int=20, k::Float64=2.0)
|
| 136 |
+
len = length(close); mid = sma(close,n); sd = fill(NaN, len)
|
| 137 |
+
for i in n:len; sd[i] = std(close[i-n+1:i]; corrected=false); end
|
| 138 |
+
return mid.+k.*sd, mid, mid.-k.*sd
|
| 139 |
+
end
|
| 140 |
+
|
| 141 |
+
function keltner(high::Vector{Float64}, low::Vector{Float64}, close::Vector{Float64}, n::Int=20, k::Float64=2.0)
|
| 142 |
+
mid = ema(close,n); a = atr(high,low,close,n)
|
| 143 |
+
return mid.+k.*a, mid, mid.-k.*a
|
| 144 |
+
end
|
| 145 |
+
|
| 146 |
+
function donchian(high::Vector{Float64}, low::Vector{Float64}, n::Int=20)
|
| 147 |
+
len = length(high); u = fill(NaN,len); l = fill(NaN,len)
|
| 148 |
+
for i in n:len; u[i]=maximum(high[i-n+1:i]); l[i]=minimum(low[i-n+1:i]); end
|
| 149 |
+
return u, (u.+l)./2.0, l
|
| 150 |
+
end
|
| 151 |
+
|
| 152 |
+
function std_dev(x::Vector{Float64}, n::Int=20)::Vector{Float64}
|
| 153 |
+
len = length(x); out = fill(NaN, len)
|
| 154 |
+
for i in n:len; out[i] = std(x[i-n+1:i]; corrected=false); end
|
| 155 |
+
return out
|
| 156 |
+
end
|
| 157 |
+
|
| 158 |
+
function zscore(x::Vector{Float64}, n::Int=20)::Vector{Float64}
|
| 159 |
+
mu = sma(x,n); sd = std_dev(x,n); out = fill(NaN, length(x))
|
| 160 |
+
for i in eachindex(x)
|
| 161 |
+
!isnan(mu[i]) && !isnan(sd[i]) && sd[i]>0 && (out[i]=(x[i]-mu[i])/sd[i])
|
| 162 |
+
end
|
| 163 |
+
return out
|
| 164 |
+
end
|
| 165 |
+
|
| 166 |
+
# ββ Trend strength ββββββββββββββββββββββββββββββββββββ
|
| 167 |
+
|
| 168 |
+
function adx(high::Vector{Float64}, low::Vector{Float64}, close::Vector{Float64}, n::Int=14)
|
| 169 |
+
tr = _tr(high,low,close)
|
| 170 |
+
up = diff(vcat(high[1],high)); dn = diff(vcat(low[1],low))
|
| 171 |
+
pdm = map((u,d)->u>d&&u>0 ? u : 0.0, up, dn)
|
| 172 |
+
ndm = map((u,d)->d>u&&d>0 ? d : 0.0, up, dn)
|
| 173 |
+
sm=ema(tr,n); pdi=100.0.*ema(pdm,n)./(sm.+1e-10); ndi=100.0.*ema(ndm,n)./(sm.+1e-10)
|
| 174 |
+
dx = 100.0.*abs.(pdi.-ndi)./(pdi.+ndi.+1e-10)
|
| 175 |
+
return ema(dx,n), pdi, ndi
|
| 176 |
+
end
|
| 177 |
+
|
| 178 |
+
# ββ Volume ββββββββββββββββββββββββββββββββββββββββββββ
|
| 179 |
+
|
| 180 |
+
function vwap(high::Vector{Float64}, low::Vector{Float64}, close::Vector{Float64}, volume::Vector{Float64})::Vector{Float64}
|
| 181 |
+
tp = (high.+low.+close)./3.0
|
| 182 |
+
return cumsum(tp.*volume)./(cumsum(volume).+1e-10)
|
| 183 |
+
end
|
| 184 |
+
|
| 185 |
+
function obv(close::Vector{Float64}, volume::Vector{Float64})::Vector{Float64}
|
| 186 |
+
len = length(close); out = zeros(Float64, len); out[1] = volume[1]
|
| 187 |
+
for i in 2:len
|
| 188 |
+
d = close[i]-close[i-1]
|
| 189 |
+
out[i] = out[i-1] + (d>0 ? volume[i] : d<0 ? -volume[i] : 0.0)
|
| 190 |
+
end
|
| 191 |
+
return out
|
| 192 |
+
end
|
| 193 |
+
|
| 194 |
+
function cmf(high::Vector{Float64}, low::Vector{Float64}, close::Vector{Float64},
|
| 195 |
+
volume::Vector{Float64}, n::Int=20)::Vector{Float64}
|
| 196 |
+
len = length(close); out = fill(NaN, len)
|
| 197 |
+
hl = high.-low
|
| 198 |
+
mfv = map((c,l,h,hl)->hl==0 ? 0.0 : (2c-l-h)/hl, close,low,high,hl).*volume
|
| 199 |
+
for i in n:len
|
| 200 |
+
sv = sum(volume[i-n+1:i])
|
| 201 |
+
out[i] = sv==0 ? 0.0 : sum(mfv[i-n+1:i])/sv
|
| 202 |
+
end
|
| 203 |
+
return out
|
| 204 |
+
end
|
| 205 |
+
|
| 206 |
+
# ββ Utilities βββββββββββββββββββββββββββββββββββββββββ
|
| 207 |
+
|
| 208 |
+
highest(x::Vector{Float64}, n::Int) = let len=length(x),out=fill(NaN,len); for i in n:len; out[i]=maximum(x[i-n+1:i]); end; out end
|
| 209 |
+
lowest(x::Vector{Float64}, n::Int) = let len=length(x),out=fill(NaN,len); for i in n:len; out[i]=minimum(x[i-n+1:i]); end; out end
|
| 210 |
+
|
| 211 |
+
function crossover(a::Vector{Float64}, b::Vector{Float64})::Vector{Bool}
|
| 212 |
+
len=length(a); out=fill(false,len)
|
| 213 |
+
for i in 2:len; out[i] = a[i]>b[i] && a[i-1]<=b[i-1]; end
|
| 214 |
+
return out
|
| 215 |
+
end
|
| 216 |
+
|
| 217 |
+
function crossunder(a::Vector{Float64}, b::Vector{Float64})::Vector{Bool}
|
| 218 |
+
len=length(a); out=fill(false,len)
|
| 219 |
+
for i in 2:len; out[i] = a[i]<b[i] && a[i-1]>=b[i-1]; end
|
| 220 |
+
return out
|
| 221 |
+
end
|
| 222 |
+
|
| 223 |
+
end # module Indicators
|
src/Manifest.toml
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This file is machine-generated - editing it directly is not recommended
|
| 2 |
+
julia_version = "1.10.7"
|
| 3 |
+
manifest_format = "2.0"
|
| 4 |
+
project_hash = "abc123"
|
| 5 |
+
|
| 6 |
+
[[deps.JSON3]]
|
| 7 |
+
deps = ["Dates", "Mmap", "Parsers", "PrecompileTools", "StructTypes", "UUIDs"]
|
| 8 |
+
git-tree-sha1 = "eb3edce0ed4fa32f75a0a11217433c31d56bd48b"
|
| 9 |
+
uuid = "0f8b85d8-7e73-4b43-9b43-f8e4f07d6bcd"
|
| 10 |
+
version = "1.14.0"
|
| 11 |
+
|
| 12 |
+
[[deps.Random]]
|
| 13 |
+
deps = ["SHA"]
|
| 14 |
+
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
|
| 15 |
+
|
| 16 |
+
[[deps.Statistics]]
|
| 17 |
+
deps = ["LinearAlgebra", "SparseArrays"]
|
| 18 |
+
uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
|
| 19 |
+
version = "1.10.0"
|
src/Optimizer.jl
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Optimizer.jl β Walk-forward optimization engine.
|
| 3 |
+
No includes. BacktestConfig/run_backtest/BacktestResult received via QuantEngine.
|
| 4 |
+
"""
|
| 5 |
+
module Optimizer
|
| 6 |
+
|
| 7 |
+
using Statistics, Random
|
| 8 |
+
|
| 9 |
+
export walk_forward_optimize, OptimResult
|
| 10 |
+
|
| 11 |
+
mutable struct OptimResult
|
| 12 |
+
strategy_name::String; symbol::String; timeframe::String
|
| 13 |
+
optimal_params::Dict{String,Float64}
|
| 14 |
+
oos_sharpe_mean::Float64; oos_sharpe_std::Float64
|
| 15 |
+
oos_win_rate::Float64; oos_max_dd::Float64; oos_pf_mean::Float64
|
| 16 |
+
oos_trades::Int; wf_efficiency::Float64; robustness::Float64
|
| 17 |
+
is_viable::Bool; reasons::Vector{String}; oos_sharpes::Vector{Float64}
|
| 18 |
+
end
|
| 19 |
+
|
| 20 |
+
OptimResult(n,s,t) = OptimResult(n,s,t,Dict{String,Float64}(),
|
| 21 |
+
0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,false,String[],Float64[])
|
| 22 |
+
|
| 23 |
+
function walk_forward_optimize(
|
| 24 |
+
signal_fn::Function,
|
| 25 |
+
param_grid::Dict{String,Vector{Float64}},
|
| 26 |
+
open_p::Vector{Float64}, high::Vector{Float64},
|
| 27 |
+
low::Vector{Float64}, close::Vector{Float64},
|
| 28 |
+
volume::Vector{Float64}, timeframe::String,
|
| 29 |
+
strategy_name::String, symbol::String;
|
| 30 |
+
run_bt_fn::Function, # run_backtest injected from QuantEngine
|
| 31 |
+
bt_cfg_fn::Function, # BacktestConfig() constructor injected
|
| 32 |
+
n_windows::Int=5, is_ratio::Float64=0.70,
|
| 33 |
+
min_trades::Int=30, min_sharpe::Float64=0.5,
|
| 34 |
+
max_combos::Int=300,
|
| 35 |
+
)::OptimResult
|
| 36 |
+
result = OptimResult(strategy_name, symbol, timeframe)
|
| 37 |
+
n = length(close)
|
| 38 |
+
n < 200 && (push!(result.reasons,"Need β₯200 bars, got $n"); return result)
|
| 39 |
+
isempty(param_grid) && (param_grid = Dict{String,Vector{Float64}}())
|
| 40 |
+
|
| 41 |
+
cfg = bt_cfg_fn()
|
| 42 |
+
combos = _build_combos(param_grid, max_combos)
|
| 43 |
+
windows = _windows(n, n_windows)
|
| 44 |
+
isempty(windows) && (push!(result.reasons,"No WF windows"); return result)
|
| 45 |
+
|
| 46 |
+
win_params=Vector{Dict{String,Float64}}()
|
| 47 |
+
is_sharpes=Float64[]; oos_sharpes=Float64[]
|
| 48 |
+
oos_results=[]
|
| 49 |
+
|
| 50 |
+
for (is_s,is_e,oos_s,oos_e) in windows
|
| 51 |
+
best_p=nothing; best_sh=-Inf
|
| 52 |
+
for p in combos
|
| 53 |
+
r = _run(signal_fn,run_bt_fn,cfg,
|
| 54 |
+
open_p[is_s:is_e],high[is_s:is_e],
|
| 55 |
+
low[is_s:is_e],close[is_s:is_e],
|
| 56 |
+
volume[is_s:is_e],p,timeframe)
|
| 57 |
+
r.is_valid && r.n_trades>=min_trades && r.sharpe>best_sh && (best_sh=r.sharpe; best_p=p)
|
| 58 |
+
end
|
| 59 |
+
best_p===nothing && continue
|
| 60 |
+
push!(win_params,best_p); push!(is_sharpes,best_sh)
|
| 61 |
+
oos_r = _run(signal_fn,run_bt_fn,cfg,
|
| 62 |
+
open_p[oos_s:oos_e],high[oos_s:oos_e],
|
| 63 |
+
low[oos_s:oos_e],close[oos_s:oos_e],
|
| 64 |
+
volume[oos_s:oos_e],best_p,timeframe)
|
| 65 |
+
push!(oos_results,oos_r); push!(oos_sharpes,oos_r.sharpe)
|
| 66 |
+
end
|
| 67 |
+
|
| 68 |
+
isempty(oos_results) && (push!(result.reasons,"No valid WF windows"); return result)
|
| 69 |
+
result.oos_sharpes = oos_sharpes
|
| 70 |
+
|
| 71 |
+
valid = filter(r->r.is_valid && r.n_trades>=min_trades, oos_results)
|
| 72 |
+
if !isempty(valid)
|
| 73 |
+
sh=[r.sharpe for r in valid]
|
| 74 |
+
result.oos_sharpe_mean=mean(sh); result.oos_sharpe_std=std(sh)
|
| 75 |
+
result.oos_win_rate=mean([r.win_rate for r in valid])
|
| 76 |
+
result.oos_max_dd=mean([r.max_dd for r in valid])
|
| 77 |
+
pfs=filter(x->x<100,[r.profit_factor for r in valid])
|
| 78 |
+
result.oos_pf_mean=isempty(pfs) ? 0.0 : mean(pfs)
|
| 79 |
+
result.oos_trades=sum(r.n_trades for r in valid)
|
| 80 |
+
end
|
| 81 |
+
if !isempty(is_sharpes) && !isempty(oos_sharpes)
|
| 82 |
+
mis=mean(is_sharpes); mos=mean(oos_sharpes)
|
| 83 |
+
result.wf_efficiency = mis>0 ? mos/mis : 0.0
|
| 84 |
+
end
|
| 85 |
+
result.optimal_params = _vote(win_params, oos_sharpes)
|
| 86 |
+
result.robustness = _robustness(result, min_trades)
|
| 87 |
+
result.is_viable, result.reasons = _viability(result, min_trades, min_sharpe)
|
| 88 |
+
return result
|
| 89 |
+
end
|
| 90 |
+
|
| 91 |
+
function _run(sig_fn,run_bt,cfg,o,h,l,c,v,params,tf)
|
| 92 |
+
try
|
| 93 |
+
sigs = sig_fn(o,h,l,c,v,params)
|
| 94 |
+
return run_bt(o,h,l,c,v,sigs,tf,cfg)
|
| 95 |
+
catch e
|
| 96 |
+
# Return an invalid result
|
| 97 |
+
r = run_bt(o,h,l,c,v,zeros(Int,length(c)),tf,cfg)
|
| 98 |
+
r.is_valid = false; r.error_msg = string(e)
|
| 99 |
+
return r
|
| 100 |
+
end
|
| 101 |
+
end
|
| 102 |
+
|
| 103 |
+
function _build_combos(grid::Dict{String,Vector{Float64}}, max_c::Int)::Vector{Dict{String,Float64}}
|
| 104 |
+
isempty(grid) && return [Dict{String,Float64}()]
|
| 105 |
+
ks=collect(keys(grid)); vs=[grid[k] for k in ks]
|
| 106 |
+
all_c=Dict{String,Float64}[]
|
| 107 |
+
function recurse(i,current)
|
| 108 |
+
if i>length(ks); push!(all_c,copy(current)); return; end
|
| 109 |
+
for v in vs[i]; current[ks[i]]=v; recurse(i+1,current); end
|
| 110 |
+
end
|
| 111 |
+
recurse(1,Dict{String,Float64}())
|
| 112 |
+
length(all_c)>max_c && (all_c=all_c[randperm(length(all_c))[1:max_c]])
|
| 113 |
+
return all_c
|
| 114 |
+
end
|
| 115 |
+
|
| 116 |
+
function _windows(n::Int,nw::Int)::Vector{Tuple{Int,Int,Int,Int}}
|
| 117 |
+
osz=max(50,nΓ·(nw*2)); wins=Tuple{Int,Int,Int,Int}[]
|
| 118 |
+
for i in 0:(nw-1)
|
| 119 |
+
oe=n-i*osz; os=oe-osz+1; ie=os-1
|
| 120 |
+
ie-1<100||oe-os<50 && continue
|
| 121 |
+
push!(wins,(1,ie,os,oe))
|
| 122 |
+
end
|
| 123 |
+
return reverse(wins)
|
| 124 |
+
end
|
| 125 |
+
|
| 126 |
+
function _vote(pl::Vector{Dict{String,Float64}}, oos::Vector{Float64})::Dict{String,Float64}
|
| 127 |
+
isempty(pl) && return Dict{String,Float64}()
|
| 128 |
+
length(pl)==1 && return pl[1]
|
| 129 |
+
w=max.(0.0,oos[1:length(pl)]); tw=sum(w)
|
| 130 |
+
w = tw>0 ? w./tw : fill(1.0/length(pl),length(pl))
|
| 131 |
+
ks=collect(keys(pl[1])); result=Dict{String,Float64}()
|
| 132 |
+
for k in ks
|
| 133 |
+
vals=[p[k] for p in pl if haskey(p,k)]
|
| 134 |
+
wi=w[1:length(vals)]
|
| 135 |
+
si=sortperm(vals); cv=cumsum(wi[si])
|
| 136 |
+
mi=findfirst(x->x>=0.5,cv)
|
| 137 |
+
result[k]=vals[si[mi!==nothing ? mi : end]]
|
| 138 |
+
end
|
| 139 |
+
return result
|
| 140 |
+
end
|
| 141 |
+
|
| 142 |
+
function _robustness(r::OptimResult, mt::Int)::Float64
|
| 143 |
+
s=clamp(r.wf_efficiency,0.0,1.0)*40.0
|
| 144 |
+
r.oos_sharpe_mean>0 && (s+=clamp(1.0-r.oos_sharpe_std/(r.oos_sharpe_mean+1e-9),0.0,1.0)*30.0)
|
| 145 |
+
s+=clamp(r.oos_trades/max(1,mt*10),0.0,1.0)*20.0
|
| 146 |
+
r.oos_pf_mean>1 && (s+=clamp((r.oos_pf_mean-1)/2,0.0,1.0)*10.0)
|
| 147 |
+
return round(s;digits=1)
|
| 148 |
+
end
|
| 149 |
+
|
| 150 |
+
function _viability(r::OptimResult,mt::Int,ms::Float64)::Tuple{Bool,Vector{String}}
|
| 151 |
+
reasons=String[]
|
| 152 |
+
r.oos_sharpe_mean<ms && push!(reasons,"OOS Sharpe $(round(r.oos_sharpe_mean;digits=2)) < $ms")
|
| 153 |
+
r.oos_trades<mt && push!(reasons,"Too few OOS trades: $(r.oos_trades) < $mt")
|
| 154 |
+
r.oos_max_dd>30.0 && push!(reasons,"High avg DD: $(round(r.oos_max_dd;digits=1))%")
|
| 155 |
+
r.wf_efficiency<0.3 && push!(reasons,"Low WFE: $(round(r.wf_efficiency;digits=2))")
|
| 156 |
+
r.oos_pf_mean<1.1 && push!(reasons,"PF $(round(r.oos_pf_mean;digits=2)) < 1.1")
|
| 157 |
+
viable=isempty(reasons)
|
| 158 |
+
viable && push!(reasons,"β
Sharpe=$(round(r.oos_sharpe_mean;digits=2)) DD=$(round(r.oos_max_dd;digits=1))% WFE=$(round(r.wf_efficiency;digits=2)) Score=$(r.robustness)/100")
|
| 159 |
+
return viable,reasons
|
| 160 |
+
end
|
| 161 |
+
|
| 162 |
+
end # module Optimizer
|
src/Project.toml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[deps]
|
| 2 |
+
JSON3 = "0f8b85d8-7e73-4b43-9b43-f8e4f07d6bcd"
|
| 3 |
+
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
|
| 4 |
+
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
|
| 5 |
+
|
| 6 |
+
[compat]
|
| 7 |
+
julia = "1.10"
|
src/QuantEngine.jl
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
QuantEngine.jl β Top-level module. Only file that uses include().
|
| 3 |
+
Wires all submodules together by injecting dependencies explicitly.
|
| 4 |
+
Python imports this via juliacall.
|
| 5 |
+
"""
|
| 6 |
+
module QuantEngine
|
| 7 |
+
|
| 8 |
+
using Statistics, Random
|
| 9 |
+
|
| 10 |
+
# ββ Include all submodules (ONLY here) βββββββββββββββ
|
| 11 |
+
include("Indicators.jl")
|
| 12 |
+
include("BacktestEngine.jl")
|
| 13 |
+
include("Optimizer.jl")
|
| 14 |
+
include("SignalCompiler.jl")
|
| 15 |
+
|
| 16 |
+
using .Indicators
|
| 17 |
+
using .BacktestEngine
|
| 18 |
+
using .Optimizer
|
| 19 |
+
using .SignalCompiler
|
| 20 |
+
|
| 21 |
+
export
|
| 22 |
+
# Indicators
|
| 23 |
+
sma, ema, wma, tema, dema,
|
| 24 |
+
rsi, macd, stoch, cci, williams_r,
|
| 25 |
+
atr, bbands, keltner, donchian, adx,
|
| 26 |
+
vwap, obv, cmf, zscore, std_dev,
|
| 27 |
+
momentum, roc, highest, lowest,
|
| 28 |
+
crossover, crossunder,
|
| 29 |
+
# Engine
|
| 30 |
+
BacktestConfig,
|
| 31 |
+
# High-level
|
| 32 |
+
full_backtest_pipeline
|
| 33 |
+
|
| 34 |
+
"""
|
| 35 |
+
full_backtest_pipeline(...) -> Dict{String,Any}
|
| 36 |
+
|
| 37 |
+
End-to-end: compile Julia strategy code β walk-forward optimize
|
| 38 |
+
β return plain Dict that crosses to Python cleanly.
|
| 39 |
+
"""
|
| 40 |
+
function full_backtest_pipeline(
|
| 41 |
+
strategy_code::String, strategy_name::String,
|
| 42 |
+
open_p::Vector{Float64}, high::Vector{Float64},
|
| 43 |
+
low::Vector{Float64}, close::Vector{Float64},
|
| 44 |
+
volume::Vector{Float64}, timeframe::String, symbol::String;
|
| 45 |
+
n_windows::Int=5, is_ratio::Float64=0.70,
|
| 46 |
+
min_trades::Int=30, min_sharpe::Float64=0.5,
|
| 47 |
+
max_combos::Int=300,
|
| 48 |
+
initial_equity::Float64=10_000.0,
|
| 49 |
+
commission_pct::Float64=0.0002,
|
| 50 |
+
risk_per_trade::Float64=0.01,
|
| 51 |
+
)::Dict{String,Any}
|
| 52 |
+
|
| 53 |
+
# 1. Compile strategy β pass Indicators module explicitly
|
| 54 |
+
compiled = SignalCompiler.compile_strategy(strategy_name, strategy_code, Indicators)
|
| 55 |
+
if !compiled.is_valid
|
| 56 |
+
return Dict{String,Any}("is_valid"=>false,"error"=>compiled.error,
|
| 57 |
+
"strategy"=>strategy_name,"symbol"=>symbol,"timeframe"=>timeframe)
|
| 58 |
+
end
|
| 59 |
+
|
| 60 |
+
# 2. Walk-forward optimize β inject BacktestEngine functions to avoid circular deps
|
| 61 |
+
cfg_fn = () -> BacktestConfig(
|
| 62 |
+
initial_equity=initial_equity,
|
| 63 |
+
commission_pct=commission_pct,
|
| 64 |
+
risk_per_trade=risk_per_trade,
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
# Wrap run_backtest to inject atr function from Indicators
|
| 68 |
+
run_bt_fn = (o,h,l,c,v,sigs,tf,cfg) ->
|
| 69 |
+
BacktestEngine.run_backtest(o,h,l,c,v,sigs,tf,cfg, Indicators.atr)
|
| 70 |
+
|
| 71 |
+
opt = Optimizer.walk_forward_optimize(
|
| 72 |
+
compiled.generate_fn,
|
| 73 |
+
compiled.param_grid_fn(),
|
| 74 |
+
open_p, high, low, close, volume,
|
| 75 |
+
timeframe, strategy_name, symbol;
|
| 76 |
+
run_bt_fn=run_bt_fn,
|
| 77 |
+
bt_cfg_fn=cfg_fn,
|
| 78 |
+
n_windows=n_windows, is_ratio=is_ratio,
|
| 79 |
+
min_trades=min_trades, min_sharpe=min_sharpe,
|
| 80 |
+
max_combos=max_combos,
|
| 81 |
+
)
|
| 82 |
+
|
| 83 |
+
return Dict{String,Any}(
|
| 84 |
+
"is_valid" => true,
|
| 85 |
+
"strategy" => opt.strategy_name,
|
| 86 |
+
"symbol" => opt.symbol,
|
| 87 |
+
"timeframe" => opt.timeframe,
|
| 88 |
+
"optimal_params" => opt.optimal_params,
|
| 89 |
+
"oos_sharpe_mean" => opt.oos_sharpe_mean,
|
| 90 |
+
"oos_sharpe_std" => opt.oos_sharpe_std,
|
| 91 |
+
"oos_win_rate" => opt.oos_win_rate,
|
| 92 |
+
"oos_max_dd" => opt.oos_max_dd,
|
| 93 |
+
"oos_pf_mean" => opt.oos_pf_mean,
|
| 94 |
+
"oos_trades" => opt.oos_trades,
|
| 95 |
+
"wf_efficiency" => opt.wf_efficiency,
|
| 96 |
+
"robustness" => opt.robustness,
|
| 97 |
+
"is_viable" => opt.is_viable,
|
| 98 |
+
"reasons" => opt.reasons,
|
| 99 |
+
"oos_sharpes" => opt.oos_sharpes,
|
| 100 |
+
)
|
| 101 |
+
end
|
| 102 |
+
|
| 103 |
+
end # module QuantEngine
|
src/SignalCompiler.jl
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
SignalCompiler.jl β Compile AI-generated Julia strategy code.
|
| 3 |
+
No includes. Indicators functions injected explicitly into sandbox.
|
| 4 |
+
"""
|
| 5 |
+
module SignalCompiler
|
| 6 |
+
|
| 7 |
+
using Statistics, Random
|
| 8 |
+
|
| 9 |
+
export compile_strategy, CompiledStrategy
|
| 10 |
+
|
| 11 |
+
struct CompiledStrategy
|
| 12 |
+
name :: String
|
| 13 |
+
generate_fn :: Function
|
| 14 |
+
param_grid_fn :: Function
|
| 15 |
+
is_valid :: Bool
|
| 16 |
+
error :: String
|
| 17 |
+
end
|
| 18 |
+
|
| 19 |
+
CompiledStrategy(name::String; error::String="") =
|
| 20 |
+
CompiledStrategy(name,
|
| 21 |
+
(o,h,l,c,v,p)->zeros(Int,length(c)),
|
| 22 |
+
()->Dict{String,Vector{Float64}}(),
|
| 23 |
+
false, error)
|
| 24 |
+
|
| 25 |
+
"""
|
| 26 |
+
compile_strategy(name, code, indicator_module) -> CompiledStrategy
|
| 27 |
+
|
| 28 |
+
indicator_module is the Indicators module, passed from QuantEngine.
|
| 29 |
+
"""
|
| 30 |
+
function compile_strategy(name::String, code::String, ind_mod::Module)::CompiledStrategy
|
| 31 |
+
safe = replace(replace(name," "=>"_"), r"[^\w]"=>"x")
|
| 32 |
+
sandbox = Module(Symbol("S_"*safe*"_"*string(rand(UInt16),base=16)))
|
| 33 |
+
|
| 34 |
+
# Inject all exported Indicators functions
|
| 35 |
+
for fn_name in names(ind_mod; all=false)
|
| 36 |
+
fn_name === :Indicators && continue
|
| 37 |
+
try
|
| 38 |
+
Core.eval(sandbox,
|
| 39 |
+
Expr(:const, Expr(:(=), fn_name, getfield(ind_mod, fn_name))))
|
| 40 |
+
catch; end
|
| 41 |
+
end
|
| 42 |
+
|
| 43 |
+
# Inject Statistics
|
| 44 |
+
for sym in (:mean,:std,:var,:median,:cor,:cov)
|
| 45 |
+
try Core.eval(sandbox, Expr(:const, Expr(:(=),sym,getfield(Statistics,sym)))); catch; end
|
| 46 |
+
end
|
| 47 |
+
|
| 48 |
+
# Inject safe Base
|
| 49 |
+
for sym in (:length,:size,:zeros,:ones,:fill,:similar,
|
| 50 |
+
:sum,:prod,:diff,:cumsum,:cumprod,
|
| 51 |
+
:max,:min,:abs,:sqrt,:log,:exp,:floor,:ceil,:round,:clamp,
|
| 52 |
+
:isnan,:isinf,:isfinite,:sign,
|
| 53 |
+
:sort,:sortperm,:reverse,:unique,:findall,:findfirst,
|
| 54 |
+
:push!,:append!,:pop!,:first,:last,:eachindex,
|
| 55 |
+
:map,:filter,:any,:all,:count,
|
| 56 |
+
:Int,:Int64,:Float64,:Bool,
|
| 57 |
+
:Dict,:Vector,:Tuple,:Set,
|
| 58 |
+
:NaN,:Inf,:pi,:true,:false,
|
| 59 |
+
:println,:string,:get)
|
| 60 |
+
try Core.eval(sandbox, Expr(:const, Expr(:(=),sym,getfield(Base,sym)))); catch
|
| 61 |
+
try Core.eval(sandbox, Expr(:const, Expr(:(=),sym,eval(sym)))); catch; end
|
| 62 |
+
end
|
| 63 |
+
end
|
| 64 |
+
|
| 65 |
+
parsed = try Meta.parseall(code)
|
| 66 |
+
catch e; return CompiledStrategy(name; error="Parse: $(sprint(showerror,e))"); end
|
| 67 |
+
|
| 68 |
+
try Core.eval(sandbox, parsed)
|
| 69 |
+
catch e; return CompiledStrategy(name; error="Eval: $(sprint(showerror,e))"); end
|
| 70 |
+
|
| 71 |
+
isdefined(sandbox,:get_param_grid) ||
|
| 72 |
+
return CompiledStrategy(name; error="Missing: get_param_grid()")
|
| 73 |
+
isdefined(sandbox,:generate_signals) ||
|
| 74 |
+
return CompiledStrategy(name; error="Missing: generate_signals(o,h,l,c,v,params)")
|
| 75 |
+
|
| 76 |
+
gen_fn = getfield(sandbox, :generate_signals)
|
| 77 |
+
grid_fn = getfield(sandbox, :get_param_grid)
|
| 78 |
+
|
| 79 |
+
err = _smoke(gen_fn, grid_fn)
|
| 80 |
+
err != "" && return CompiledStrategy(name; error=err)
|
| 81 |
+
|
| 82 |
+
return CompiledStrategy(name, gen_fn, grid_fn, true, "")
|
| 83 |
+
end
|
| 84 |
+
|
| 85 |
+
function _smoke(gen_fn, grid_fn)::String
|
| 86 |
+
try
|
| 87 |
+
grid=grid_fn()
|
| 88 |
+
grid isa Dict || return "get_param_grid() must return Dict"
|
| 89 |
+
params=Dict{String,Float64}(k=>Float64(v isa Vector && !isempty(v) ? v[1] : 0) for (k,v) in grid)
|
| 90 |
+
n=200; c=100.0.*exp.(cumsum(randn(n).*0.005))
|
| 91 |
+
h=c.*(1.0.+abs.(randn(n)).*0.003); l=c.*(1.0.-abs.(randn(n)).*0.003)
|
| 92 |
+
o=c.*(1.0.+randn(n).*0.001); v=abs.(randn(n)).*1000.0.+500.0
|
| 93 |
+
sigs=gen_fn(o,h,l,c,v,params)
|
| 94 |
+
sigs isa Vector || return "generate_signals must return Vector, got $(typeof(sigs))"
|
| 95 |
+
length(sigs)!=n && return "Signal length $(length(sigs)) β $n"
|
| 96 |
+
any(s->!(s in (-1,0,1)), sigs) && return "Values must be in {-1,0,1}"
|
| 97 |
+
catch e; return "Smoke: $(sprint(showerror,e))"; end
|
| 98 |
+
return ""
|
| 99 |
+
end
|
| 100 |
+
|
| 101 |
+
end # module SignalCompiler
|
src/strategy_template.jl
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 2 |
+
# JULIA STRATEGY TEMPLATE
|
| 3 |
+
# This is the exact format Claude generates for each strategy.
|
| 4 |
+
# Two functions required. No module/using declarations needed β
|
| 5 |
+
# all Indicators functions are pre-injected by SignalCompiler.jl.
|
| 6 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 7 |
+
|
| 8 |
+
# ββ Example: EMA Crossover Strategy βββββββββββββββββββββββββββββββββ
|
| 9 |
+
|
| 10 |
+
"""
|
| 11 |
+
Return parameter ranges for walk-forward grid search.
|
| 12 |
+
Keys must be valid Julia identifiers. Values are Float64 ranges.
|
| 13 |
+
"""
|
| 14 |
+
function get_param_grid() :: Dict{String, Vector{Float64}}
|
| 15 |
+
return Dict(
|
| 16 |
+
"fast_period" => [10.0, 15.0, 20.0, 25.0],
|
| 17 |
+
"slow_period" => [40.0, 50.0, 60.0, 80.0],
|
| 18 |
+
"atr_filter" => [14.0], # single value = no optimization
|
| 19 |
+
)
|
| 20 |
+
end
|
| 21 |
+
|
| 22 |
+
"""
|
| 23 |
+
Generate trading signals from OHLCV arrays.
|
| 24 |
+
|
| 25 |
+
Arguments (all same length n):
|
| 26 |
+
open_p, high, low, close, volume :: Vector{Float64}
|
| 27 |
+
params :: Dict{String,Float64} β one value per key from get_param_grid()
|
| 28 |
+
|
| 29 |
+
Returns Vector{Int} of length n:
|
| 30 |
+
1 = enter/hold long
|
| 31 |
+
-1 = enter/hold short
|
| 32 |
+
0 = flat / no position
|
| 33 |
+
|
| 34 |
+
Rules:
|
| 35 |
+
- Return 0 for the first ~slow_period bars (warmup / NaN period)
|
| 36 |
+
- Always use isnan() checks before comparisons
|
| 37 |
+
- Signals are position signals, not entry triggers
|
| 38 |
+
(engine manages entries/exits from signal transitions)
|
| 39 |
+
"""
|
| 40 |
+
function generate_signals(
|
| 41 |
+
open_p :: Vector{Float64},
|
| 42 |
+
high :: Vector{Float64},
|
| 43 |
+
low :: Vector{Float64},
|
| 44 |
+
close :: Vector{Float64},
|
| 45 |
+
volume :: Vector{Float64},
|
| 46 |
+
params :: Dict{String, Float64},
|
| 47 |
+
) :: Vector{Int}
|
| 48 |
+
|
| 49 |
+
n = length(close)
|
| 50 |
+
fast_p = Int(round(get(params, "fast_period", 20.0)))
|
| 51 |
+
slow_p = Int(round(get(params, "slow_period", 50.0)))
|
| 52 |
+
atr_p = Int(round(get(params, "atr_filter", 14.0)))
|
| 53 |
+
|
| 54 |
+
fast_ema = ema(close, fast_p)
|
| 55 |
+
slow_ema = ema(close, slow_p)
|
| 56 |
+
atr_vals = atr(high, low, close, atr_p)
|
| 57 |
+
|
| 58 |
+
signals = zeros(Int, n)
|
| 59 |
+
|
| 60 |
+
for i in (slow_p + 1):n
|
| 61 |
+
# Skip if any indicator is NaN (still in warmup)
|
| 62 |
+
isnan(fast_ema[i]) && continue
|
| 63 |
+
isnan(slow_ema[i]) && continue
|
| 64 |
+
isnan(atr_vals[i]) && continue
|
| 65 |
+
|
| 66 |
+
# Optional: ATR volatility filter β only trade when market is moving
|
| 67 |
+
atr_threshold = close[i] * 0.001 # 0.1% of price
|
| 68 |
+
atr_vals[i] < atr_threshold && continue
|
| 69 |
+
|
| 70 |
+
if fast_ema[i] > slow_ema[i]
|
| 71 |
+
signals[i] = 1 # bullish: long
|
| 72 |
+
elseif fast_ema[i] < slow_ema[i]
|
| 73 |
+
signals[i] = -1 # bearish: short
|
| 74 |
+
else
|
| 75 |
+
signals[i] = 0 # neutral
|
| 76 |
+
end
|
| 77 |
+
end
|
| 78 |
+
|
| 79 |
+
return signals
|
| 80 |
+
end
|
| 81 |
+
|
| 82 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 83 |
+
# Example 2: RSI Mean Reversion
|
| 84 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 85 |
+
|
| 86 |
+
# function get_param_grid()
|
| 87 |
+
# return Dict(
|
| 88 |
+
# "rsi_period" => [7.0, 10.0, 14.0, 21.0],
|
| 89 |
+
# "oversold" => [25.0, 30.0, 35.0],
|
| 90 |
+
# "overbought" => [65.0, 70.0, 75.0],
|
| 91 |
+
# "ma_period" => [20.0, 50.0],
|
| 92 |
+
# )
|
| 93 |
+
# end
|
| 94 |
+
#
|
| 95 |
+
# function generate_signals(open_p, high, low, close, volume, params)
|
| 96 |
+
# n = length(close)
|
| 97 |
+
# rsi_p = Int(round(get(params, "rsi_period", 14.0)))
|
| 98 |
+
# oversold = get(params, "oversold", 30.0)
|
| 99 |
+
# overbought = get(params, "overbought", 70.0)
|
| 100 |
+
# ma_p = Int(round(get(params, "ma_period", 50.0)))
|
| 101 |
+
#
|
| 102 |
+
# rsi_vals = rsi(close, rsi_p)
|
| 103 |
+
# trend_ma = sma(close, ma_p)
|
| 104 |
+
# signals = zeros(Int, n)
|
| 105 |
+
#
|
| 106 |
+
# for i in (ma_p + rsi_p + 1):n
|
| 107 |
+
# isnan(rsi_vals[i]) && continue
|
| 108 |
+
# isnan(trend_ma[i]) && continue
|
| 109 |
+
#
|
| 110 |
+
# # Mean reversion: buy oversold in uptrend, sell overbought in downtrend
|
| 111 |
+
# if rsi_vals[i] < oversold && close[i] > trend_ma[i]
|
| 112 |
+
# signals[i] = 1
|
| 113 |
+
# elseif rsi_vals[i] > overbought && close[i] < trend_ma[i]
|
| 114 |
+
# signals[i] = -1
|
| 115 |
+
# end
|
| 116 |
+
# end
|
| 117 |
+
# return signals
|
| 118 |
+
# end
|
src/warmup.jl
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
push!(LOAD_PATH, @__DIR__)
|
| 2 |
+
include(joinpath(@__DIR__, "QuantEngine.jl"))
|
| 3 |
+
using .QuantEngine
|
| 4 |
+
using Statistics, Random
|
| 5 |
+
|
| 6 |
+
println("Warming up all Julia hot paths...")
|
| 7 |
+
|
| 8 |
+
n=500; c=100.0.*exp.(cumsum(randn(n).*0.005))
|
| 9 |
+
h=c.*(1.0.+abs.(randn(n)).*0.005); l=c.*(1.0.-abs.(randn(n)).*0.005)
|
| 10 |
+
o=c.*(1.0.+randn(n).*0.002); v=abs.(randn(n)).*2000.0.+1000.0
|
| 11 |
+
|
| 12 |
+
_=sma(c,20); println(" sma β")
|
| 13 |
+
_=ema(c,20); println(" ema β")
|
| 14 |
+
_=rsi(c,14); println(" rsi β")
|
| 15 |
+
_=macd(c); println(" macd β")
|
| 16 |
+
_=atr(h,l,c,14); println(" atr β")
|
| 17 |
+
_=bbands(c,20,2.0); println(" bbands β")
|
| 18 |
+
_=donchian(h,l,20); println(" donchian β")
|
| 19 |
+
_=adx(h,l,c,14); println(" adx β")
|
| 20 |
+
_=stoch(h,l,c); println(" stoch β")
|
| 21 |
+
_=zscore(c,20); println(" zscore β")
|
| 22 |
+
println("All indicators warmed β")
|
| 23 |
+
|
| 24 |
+
code = """
|
| 25 |
+
function get_param_grid() :: Dict{String, Vector{Float64}}
|
| 26 |
+
return Dict("period" => [10.0, 20.0, 30.0])
|
| 27 |
+
end
|
| 28 |
+
function generate_signals(open_p, high, low, close, volume, params)
|
| 29 |
+
n = length(close)
|
| 30 |
+
p = Int(round(get(params, "period", 20.0)))
|
| 31 |
+
ma = sma(close, p)
|
| 32 |
+
signals = zeros(Int, n)
|
| 33 |
+
for i in (p+1):n
|
| 34 |
+
isnan(ma[i]) && continue
|
| 35 |
+
signals[i] = close[i] > ma[i] ? 1 : -1
|
| 36 |
+
end
|
| 37 |
+
return signals
|
| 38 |
+
end
|
| 39 |
+
"""
|
| 40 |
+
|
| 41 |
+
result = full_backtest_pipeline(
|
| 42 |
+
code, "WarmupTest",
|
| 43 |
+
o, h, l, c, v, "1h", "TEST";
|
| 44 |
+
n_windows=2, max_combos=3, min_trades=1,
|
| 45 |
+
)
|
| 46 |
+
println("full_backtest_pipeline: is_valid=$(result[\"is_valid\"]) viable=$(result[\"is_viable\"])")
|
| 47 |
+
println("\nβ
Julia warmup complete β all hot paths compiled.")
|
src/warmup_bridge.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
warmup_bridge.py
|
| 3 |
+
Pre-warms the juliacall PythonβJulia bridge at build time.
|
| 4 |
+
Called from Dockerfile Step 3 β runs once, caches the Julia
|
| 5 |
+
session location so runtime startup is instant.
|
| 6 |
+
"""
|
| 7 |
+
import os
|
| 8 |
+
import sys
|
| 9 |
+
|
| 10 |
+
os.environ["JULIA_PROJECT"] = "/app/src"
|
| 11 |
+
os.environ["JULIA_DEPOT_PATH"] = "/app/.julia"
|
| 12 |
+
|
| 13 |
+
print("Pre-warming juliacall bridge...")
|
| 14 |
+
try:
|
| 15 |
+
from juliacall import Main as jl
|
| 16 |
+
jl.seval('push!(LOAD_PATH, "/app/src")')
|
| 17 |
+
jl.seval('include("/app/src/QuantEngine.jl")')
|
| 18 |
+
jl.seval("using .QuantEngine")
|
| 19 |
+
|
| 20 |
+
# Quick sanity check β call one indicator through the bridge
|
| 21 |
+
import numpy as np
|
| 22 |
+
c = (100.0 * np.exp(np.cumsum(np.random.randn(100) * 0.005))).tolist()
|
| 23 |
+
result = jl.QuantEngine.sma(jl.convert(jl.Vector[jl.Float64], c), 20)
|
| 24 |
+
assert len(result) == 100
|
| 25 |
+
print("juliacall bridge warmed up β")
|
| 26 |
+
sys.exit(0)
|
| 27 |
+
except Exception as e:
|
| 28 |
+
print(f"WARNING: juliacall warmup failed: {e}")
|
| 29 |
+
print("App will still work β Julia initialises on first request instead.")
|
| 30 |
+
sys.exit(0) # Non-fatal β don't break the build
|
utils/__init__.py
ADDED
|
File without changes
|
utils/config.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""utils/config.py β reads from HF Spaces Secrets (env vars)."""
|
| 2 |
+
import os
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
|
| 5 |
+
TMP = Path("/tmp/quant")
|
| 6 |
+
for d in ["pdfs","tick_cache","compiled","exports"]:
|
| 7 |
+
(TMP / d).mkdir(parents=True, exist_ok=True)
|
| 8 |
+
|
| 9 |
+
def get(k, default=""): return os.environ.get(k, default)
|
| 10 |
+
|
| 11 |
+
ANTHROPIC_API_KEY = get("ANTHROPIC_API_KEY")
|
| 12 |
+
HF_TOKEN = get("HF_TOKEN")
|
| 13 |
+
HF_DATASET_REPO = get("HF_DATASET_REPO")
|
| 14 |
+
HF_TICK_REPO = get("HF_TICK_REPO")
|
| 15 |
+
|
| 16 |
+
SIMILARITY_THRESHOLD = float(get("SIMILARITY_THRESHOLD", "0.85"))
|
| 17 |
+
MAX_TOKENS_PER_CHUNK = int(get("MAX_TOKENS_PER_CHUNK", "3000"))
|
| 18 |
+
OCR_DPI = int(get("OCR_DPI", "300"))
|
| 19 |
+
|
| 20 |
+
INITIAL_EQUITY = float(get("INITIAL_EQUITY", "10000"))
|
| 21 |
+
COMMISSION_PCT = float(get("COMMISSION_PCT", "0.0002"))
|
| 22 |
+
RISK_PER_TRADE = float(get("RISK_PER_TRADE", "0.01"))
|
| 23 |
+
WF_WINDOWS = int(get("WF_WINDOWS", "5"))
|
| 24 |
+
WF_IS_RATIO = float(get("WF_IS_RATIO", "0.70"))
|
| 25 |
+
MAX_PARAM_COMBOS = int(get("MAX_PARAM_COMBOS", "300"))
|
| 26 |
+
MIN_TRADES = int(get("MIN_TRADES", "30"))
|
| 27 |
+
MIN_SHARPE = float(get("MIN_SHARPE", "0.5"))
|
| 28 |
+
BACKTEST_TFS = get("BACKTEST_TIMEFRAMES", "1h,4h,1d").split(",")
|
| 29 |
+
|
| 30 |
+
CATEGORIES = [
|
| 31 |
+
"Trend Following","Mean Reversion","Statistical Arbitrage",
|
| 32 |
+
"Momentum","Breakout","Volatility Trading","Market Making",
|
| 33 |
+
"Pattern Recognition","Machine Learning","Options Strategy",
|
| 34 |
+
"High Frequency","Pairs Trading","Carry Trade",
|
| 35 |
+
"Seasonal / Calendar","Risk Management","Position Sizing",
|
| 36 |
+
"Portfolio Construction","Market Microstructure","Other",
|
| 37 |
+
]
|
| 38 |
+
|
| 39 |
+
EXTRACTION_PROMPT = """
|
| 40 |
+
You are a quantitative finance knowledge extraction engine.
|
| 41 |
+
|
| 42 |
+
Extract ALL trading strategies, mathematical formulas, and complete trading systems
|
| 43 |
+
from the text below (taken from an algorithmic trading book).
|
| 44 |
+
|
| 45 |
+
Output ONLY valid JSON β no markdown fences, no preamble:
|
| 46 |
+
{
|
| 47 |
+
"strategies": [{
|
| 48 |
+
"name": "string", "category": "string", "description": "string",
|
| 49 |
+
"entry_rules": ["string"], "exit_rules": ["string"],
|
| 50 |
+
"filters": ["string"], "timeframes": ["string"], "instruments": ["string"],
|
| 51 |
+
"parameters": {"name": "description with typical value"},
|
| 52 |
+
"mathematical_basis": "string", "source_context": "string"
|
| 53 |
+
}],
|
| 54 |
+
"formulas": [{
|
| 55 |
+
"name": "string", "category": "string",
|
| 56 |
+
"latex": "LaTeX string", "plain_text": "string",
|
| 57 |
+
"variables": {"symbol": "description"},
|
| 58 |
+
"purpose": "string", "usage_context": "string", "source_context": "string"
|
| 59 |
+
}],
|
| 60 |
+
"systems": [{
|
| 61 |
+
"name": "string", "components": ["string"],
|
| 62 |
+
"entry_system": "string", "exit_system": "string",
|
| 63 |
+
"risk_management": "string", "position_sizing": "string",
|
| 64 |
+
"backtesting_notes": "string", "source_context": "string"
|
| 65 |
+
}]
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
Rules: empty arrays [] if nothing found. Preserve exact math. Include LaTeX.
|
| 69 |
+
Source: {source_file} | Pages: {page_start}β{page_end}
|
| 70 |
+
--- TEXT ---
|
| 71 |
+
{text}
|
| 72 |
+
--- END ---
|
| 73 |
+
""".strip()
|
| 74 |
+
|
| 75 |
+
COMPILER_PROMPT = """
|
| 76 |
+
You are a Julia algorithmic trading code generator.
|
| 77 |
+
|
| 78 |
+
Convert the strategy JSON below into executable Julia code.
|
| 79 |
+
Output ONLY the Julia code β no markdown fences, no explanation, no module/using declarations.
|
| 80 |
+
|
| 81 |
+
EXACT REQUIRED FORMAT (two functions, nothing else):
|
| 82 |
+
|
| 83 |
+
function get_param_grid() :: Dict{{String, Vector{{Float64}}}}
|
| 84 |
+
return Dict(
|
| 85 |
+
"param_name" => [val1, val2, val3],
|
| 86 |
+
)
|
| 87 |
+
end
|
| 88 |
+
|
| 89 |
+
function generate_signals(
|
| 90 |
+
open_p :: Vector{{Float64}},
|
| 91 |
+
high :: Vector{{Float64}},
|
| 92 |
+
low :: Vector{{Float64}},
|
| 93 |
+
close :: Vector{{Float64}},
|
| 94 |
+
volume :: Vector{{Float64}},
|
| 95 |
+
params :: Dict{{String, Float64}},
|
| 96 |
+
) :: Vector{{Int}}
|
| 97 |
+
n = length(close)
|
| 98 |
+
signals = zeros(Int, n)
|
| 99 |
+
# ... your logic here ...
|
| 100 |
+
return signals
|
| 101 |
+
end
|
| 102 |
+
|
| 103 |
+
RULES (CRITICAL β violations cause compile failure):
|
| 104 |
+
1. NO module, NO using, NO include statements
|
| 105 |
+
2. ALWAYS check isnan() before using indicator values
|
| 106 |
+
3. Return signals[i] = 0 during indicator warmup period
|
| 107 |
+
4. Values: 1=long, -1=short, 0=flat only
|
| 108 |
+
5. Get int params: Int(round(get(params, "key", default)))
|
| 109 |
+
6. Get float params: get(params, "key", default)
|
| 110 |
+
|
| 111 |
+
AVAILABLE FUNCTIONS (pre-injected, call directly without prefix):
|
| 112 |
+
Trend: sma(s,n) ema(s,n) wma(s,n) tema(s,n) dema(s,n)
|
| 113 |
+
Momentum: rsi(c,n) macd(c;fast,slow,sig)->(ml,sl,hist) momentum(s,n) roc(s,n)
|
| 114 |
+
Bands: bbands(c,n,k)->(up,mid,lo) keltner(h,l,c,n,k)->(up,mid,lo)
|
| 115 |
+
Channel: donchian(h,l,n)->(up,mid,lo) highest(s,n) lowest(s,n)
|
| 116 |
+
Volatility: atr(h,l,c,n) std_dev(s,n) zscore(s,n)
|
| 117 |
+
Oscillators: stoch(h,l,c;k,d)->(K,D) cci(h,l,c,n) williams_r(h,l,c,n)
|
| 118 |
+
Volume: vwap(h,l,c,v) obv(c,v) cmf(h,l,c,v,n)
|
| 119 |
+
Trend strength: adx(h,l,c,n)->(adx,pdi,ndi)
|
| 120 |
+
Crosses: crossover(a,b)->Bool[] crossunder(a,b)->Bool[]
|
| 121 |
+
Math: mean(v) std(v) diff(v) cumsum(v) abs(x) sqrt(x)
|
| 122 |
+
|
| 123 |
+
Strategy:
|
| 124 |
+
{strategy_json}
|
| 125 |
+
""".strip()
|
utils/hf_io.py
ADDED
|
@@ -0,0 +1,192 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""utils/hf_io.py β All HuggingFace Hub read/write."""
|
| 2 |
+
import io, json
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
from typing import Optional
|
| 5 |
+
import pandas as pd
|
| 6 |
+
from huggingface_hub import HfApi, hf_hub_download, list_repo_files, CommitOperationAdd
|
| 7 |
+
from loguru import logger
|
| 8 |
+
import utils.config as cfg
|
| 9 |
+
|
| 10 |
+
def _api(): return HfApi(token=cfg.HF_TOKEN)
|
| 11 |
+
|
| 12 |
+
# ββ Knowledge base βββββββββββββββββββββββββββββββββββββ
|
| 13 |
+
|
| 14 |
+
def kb_load() -> dict:
|
| 15 |
+
empty = {"strategies": {}, "formulas": {}, "systems": {}}
|
| 16 |
+
if not cfg.HF_DATASET_REPO: return empty
|
| 17 |
+
try:
|
| 18 |
+
path = hf_hub_download(
|
| 19 |
+
repo_id=cfg.HF_DATASET_REPO, filename="knowledge_base.jsonl",
|
| 20 |
+
repo_type="dataset", token=cfg.HF_TOKEN,
|
| 21 |
+
local_dir=str(cfg.TMP), force_download=True,
|
| 22 |
+
)
|
| 23 |
+
result = {"strategies": {}, "formulas": {}, "systems": {}}
|
| 24 |
+
with open(path, encoding="utf-8") as f:
|
| 25 |
+
for line in f:
|
| 26 |
+
line = line.strip()
|
| 27 |
+
if not line: continue
|
| 28 |
+
rec = json.loads(line)
|
| 29 |
+
kind = rec.get("_type", ""); cid = rec.get("canonical_id", "")
|
| 30 |
+
if kind in result and cid: result[kind][cid] = rec
|
| 31 |
+
logger.info(f"KB: {len(result['strategies'])} strats, {len(result['formulas'])} formulas")
|
| 32 |
+
return result
|
| 33 |
+
except Exception as e:
|
| 34 |
+
logger.warning(f"KB load (may not exist yet): {e}")
|
| 35 |
+
return empty
|
| 36 |
+
|
| 37 |
+
def kb_save(kb: dict) -> bool:
|
| 38 |
+
if not cfg.HF_DATASET_REPO: return False
|
| 39 |
+
try:
|
| 40 |
+
lines = []
|
| 41 |
+
for kind in ("strategies","formulas","systems"):
|
| 42 |
+
for rec in kb[kind].values():
|
| 43 |
+
lines.append(json.dumps({**rec, "_type": kind}))
|
| 44 |
+
_api().upload_file(
|
| 45 |
+
path_or_fileobj=io.BytesIO("\n".join(lines).encode()),
|
| 46 |
+
path_in_repo="knowledge_base.jsonl",
|
| 47 |
+
repo_id=cfg.HF_DATASET_REPO, repo_type="dataset",
|
| 48 |
+
commit_message="Update knowledge base",
|
| 49 |
+
)
|
| 50 |
+
return True
|
| 51 |
+
except Exception as e:
|
| 52 |
+
logger.error(f"KB save: {e}"); return False
|
| 53 |
+
|
| 54 |
+
# ββ Tick data ββββββββββββββββββββββββββββββββββββββββββ
|
| 55 |
+
|
| 56 |
+
def tick_list_symbols() -> list[str]:
|
| 57 |
+
if not cfg.HF_TICK_REPO: return []
|
| 58 |
+
try:
|
| 59 |
+
files = list(list_repo_files(repo_id=cfg.HF_TICK_REPO,
|
| 60 |
+
repo_type="dataset", token=cfg.HF_TOKEN))
|
| 61 |
+
seen = set(); syms = []
|
| 62 |
+
for f in files:
|
| 63 |
+
parts = f.split("/")
|
| 64 |
+
if len(parts) >= 2 and parts[0] not in seen:
|
| 65 |
+
seen.add(parts[0]); syms.append(parts[0])
|
| 66 |
+
return sorted(syms)
|
| 67 |
+
except Exception as e:
|
| 68 |
+
logger.warning(f"Tick symbols: {e}"); return []
|
| 69 |
+
|
| 70 |
+
def tick_load(symbol: str, timeframe: str = "1h") -> Optional[pd.DataFrame]:
|
| 71 |
+
cache = cfg.TMP / "tick_cache" / f"{symbol}_{timeframe}.parquet"
|
| 72 |
+
if cache.exists(): return pd.read_parquet(cache)
|
| 73 |
+
if not cfg.HF_TICK_REPO: return None
|
| 74 |
+
for fname in [f"{timeframe}.parquet", f"{timeframe}.csv",
|
| 75 |
+
"ticks.parquet", "data.parquet"]:
|
| 76 |
+
df = _try_dl(symbol, fname)
|
| 77 |
+
if df is not None:
|
| 78 |
+
df = _norm_ohlcv(df, timeframe if fname.startswith("tick") or fname=="data.parquet" else None)
|
| 79 |
+
if df is not None and not df.empty:
|
| 80 |
+
df.to_parquet(cache); return df
|
| 81 |
+
return None
|
| 82 |
+
|
| 83 |
+
def _try_dl(sym, fname):
|
| 84 |
+
try:
|
| 85 |
+
local = cfg.TMP / "tick_cache" / sym
|
| 86 |
+
local.mkdir(parents=True, exist_ok=True)
|
| 87 |
+
path = hf_hub_download(repo_id=cfg.HF_TICK_REPO,
|
| 88 |
+
filename=f"{sym}/{fname}", repo_type="dataset",
|
| 89 |
+
token=cfg.HF_TOKEN, local_dir=str(local), force_download=False)
|
| 90 |
+
return pd.read_parquet(path) if fname.endswith(".parquet") else pd.read_csv(path)
|
| 91 |
+
except Exception:
|
| 92 |
+
return None
|
| 93 |
+
|
| 94 |
+
_TF_MAP = {"1m":"1min","5m":"5min","15m":"15min","30m":"30min",
|
| 95 |
+
"1h":"1h","4h":"4h","1d":"1D","1w":"1W"}
|
| 96 |
+
|
| 97 |
+
def _norm_ohlcv(df: pd.DataFrame, resample_to=None) -> Optional[pd.DataFrame]:
|
| 98 |
+
import numpy as np
|
| 99 |
+
df = df.copy()
|
| 100 |
+
ts = next((c for c in df.columns if "time" in c.lower() or "date" in c.lower()), None)
|
| 101 |
+
if ts: df.index = pd.to_datetime(df[ts], utc=True); df = df.drop(columns=[ts])
|
| 102 |
+
else:
|
| 103 |
+
try: df.index = pd.to_datetime(df.index, utc=True)
|
| 104 |
+
except: return None
|
| 105 |
+
df.index = df.index.tz_convert("UTC") if df.index.tz else df.index.tz_localize("UTC")
|
| 106 |
+
df = df.sort_index()
|
| 107 |
+
if resample_to:
|
| 108 |
+
price_col = next((c for c in df.columns if c.lower() in ("bid","mid","price","close")), None)
|
| 109 |
+
if price_col is None: return None
|
| 110 |
+
if "bid" in df.columns and "ask" in df.columns:
|
| 111 |
+
df["_price"] = (df["bid"] + df["ask"]) / 2
|
| 112 |
+
else: df["_price"] = df[price_col]
|
| 113 |
+
rule = _TF_MAP.get(resample_to, "1h")
|
| 114 |
+
ohlcv = df["_price"].resample(rule).ohlc()
|
| 115 |
+
ohlcv.columns = ["open","high","low","close"]
|
| 116 |
+
vcol = next((c for c in df.columns if "vol" in c.lower()), None)
|
| 117 |
+
ohlcv["volume"] = df[vcol].resample(rule).sum() if vcol else df["_price"].resample(rule).count()
|
| 118 |
+
return ohlcv.dropna()
|
| 119 |
+
renames = {}
|
| 120 |
+
for c in df.columns:
|
| 121 |
+
lc = c.lower()
|
| 122 |
+
if lc in ("o","open"): renames[c]="open"
|
| 123 |
+
elif lc in ("h","high"): renames[c]="high"
|
| 124 |
+
elif lc in ("l","low"): renames[c]="low"
|
| 125 |
+
elif lc in ("c","close"): renames[c]="close"
|
| 126 |
+
elif lc in ("v","vol","volume","tick_volume"): renames[c]="volume"
|
| 127 |
+
df = df.rename(columns=renames)
|
| 128 |
+
for col in ["open","high","low","close"]:
|
| 129 |
+
if col not in df.columns: return None
|
| 130 |
+
if "volume" not in df.columns: df["volume"] = 0.0
|
| 131 |
+
df = df[["open","high","low","close","volume"]].astype(float).dropna(subset=["open","high","low","close"])
|
| 132 |
+
bad = df["high"] < df["low"]
|
| 133 |
+
if bad.any(): df.loc[bad,["high","low"]] = df.loc[bad,["low","high"]].values
|
| 134 |
+
return df
|
| 135 |
+
|
| 136 |
+
# ββ Batch push βββββββββββββββββββββββββββββββββββββββββ
|
| 137 |
+
|
| 138 |
+
def push_batch(files: list[tuple[str, bytes]], msg="Update") -> int:
|
| 139 |
+
if not cfg.HF_DATASET_REPO or not files: return 0
|
| 140 |
+
ops = [CommitOperationAdd(path_in_repo=p, path_or_fileobj=io.BytesIO(c)) for p,c in files]
|
| 141 |
+
pushed = 0
|
| 142 |
+
for i in range(0, len(ops), 100):
|
| 143 |
+
try:
|
| 144 |
+
_api().create_commit(repo_id=cfg.HF_DATASET_REPO, repo_type="dataset",
|
| 145 |
+
operations=ops[i:i+100], commit_message=f"{msg} [{i+1}β{i+len(ops[i:i+100])}]")
|
| 146 |
+
pushed += len(ops[i:i+100])
|
| 147 |
+
except Exception as e: logger.error(f"Batch push: {e}")
|
| 148 |
+
return pushed
|
| 149 |
+
|
| 150 |
+
def push_result(name, symbol, tf, report, opt_json, mt5_set, julia_cfg) -> bool:
|
| 151 |
+
from pipeline.exporter import slugify
|
| 152 |
+
sl = slugify(name); pre = f"{sl}_{symbol}_{tf}"
|
| 153 |
+
files = [
|
| 154 |
+
(f"backtests/{sl}/{pre}_report.md", report.encode()),
|
| 155 |
+
(f"optimal_sets/{pre}_optimal.json", json.dumps(opt_json,indent=2).encode()),
|
| 156 |
+
(f"optimal_sets/{pre}.set", mt5_set.encode()),
|
| 157 |
+
(f"optimal_sets/{pre}_config.jl", julia_cfg.encode()),
|
| 158 |
+
]
|
| 159 |
+
return push_batch(files, f"Backtest: {name} {symbol} {tf}") == 4
|
| 160 |
+
|
| 161 |
+
def push_index(md: str, data: dict) -> bool:
|
| 162 |
+
return push_batch([
|
| 163 |
+
("optimal_sets/BACKTEST_INDEX.md", md.encode()),
|
| 164 |
+
("optimal_sets/backtest_index.json", json.dumps(data,indent=2).encode()),
|
| 165 |
+
], "Update index") == 2
|
| 166 |
+
|
| 167 |
+
def fetch_index() -> dict:
|
| 168 |
+
try:
|
| 169 |
+
path = hf_hub_download(repo_id=cfg.HF_DATASET_REPO,
|
| 170 |
+
filename="optimal_sets/backtest_index.json",
|
| 171 |
+
repo_type="dataset", token=cfg.HF_TOKEN,
|
| 172 |
+
local_dir=str(cfg.TMP), force_download=True)
|
| 173 |
+
return json.loads(Path(path).read_text())
|
| 174 |
+
except: return {}
|
| 175 |
+
|
| 176 |
+
def fetch_file(remote: str) -> Optional[bytes]:
|
| 177 |
+
try:
|
| 178 |
+
path = hf_hub_download(repo_id=cfg.HF_DATASET_REPO,
|
| 179 |
+
filename=remote, repo_type="dataset", token=cfg.HF_TOKEN,
|
| 180 |
+
local_dir=str(cfg.TMP/"downloads"), force_download=True)
|
| 181 |
+
return Path(path).read_bytes()
|
| 182 |
+
except: return None
|
| 183 |
+
|
| 184 |
+
def pdf_upload(pdf_path: Path) -> str:
|
| 185 |
+
if not cfg.HF_DATASET_REPO: return ""
|
| 186 |
+
try:
|
| 187 |
+
return str(_api().upload_file(path_or_fileobj=str(pdf_path),
|
| 188 |
+
path_in_repo=f"pdfs/{pdf_path.name}",
|
| 189 |
+
repo_id=cfg.HF_DATASET_REPO, repo_type="dataset",
|
| 190 |
+
commit_message=f"Add PDF: {pdf_path.name}"))
|
| 191 |
+
except Exception as e:
|
| 192 |
+
logger.warning(f"PDF upload: {e}"); return ""
|