cronos3k commited on
Commit
3f27128
·
verified ·
1 Parent(s): 45d8b0a

Fix: add libopenblas-dev, auto-download demo vindex from HF dataset on startup

Browse files
Files changed (3) hide show
  1. Dockerfile +10 -7
  2. app.py +46 -1
  3. requirements.txt +1 -0
Dockerfile CHANGED
@@ -3,33 +3,36 @@ FROM rust:1.82-slim-bookworm AS builder
3
 
4
  RUN apt-get update && apt-get install -y \
5
  git pkg-config libssl-dev ca-certificates \
 
6
  && rm -rf /var/lib/apt/lists/*
7
 
8
- # Clone the fork (CPU-only build; no CUDA toolkit needed in the Space)
9
  RUN git clone --depth 1 https://github.com/cronos3k/larql /build
10
  WORKDIR /build
11
-
12
- # Build only the CLI crate (avoids protobuf issues in larql-server)
13
  RUN cargo build --release -p larql-cli
14
 
15
- # Stage 2: Runtime image
16
  FROM python:3.11-slim-bookworm
17
 
 
 
 
18
  # Copy the compiled binary
19
  COPY --from=builder /build/target/release/larql /usr/local/bin/larql
20
 
21
- # Copy the Gradio demo
22
  COPY app.py utils.py requirements.txt /app/
23
 
24
  WORKDIR /app
25
  RUN pip install --no-cache-dir -r requirements.txt
26
 
27
- # HuggingFace Spaces expects a non-root user with UID 1000
28
- RUN useradd -m -u 1000 hfuser
29
  USER hfuser
30
 
31
  EXPOSE 7860
32
  ENV GRADIO_SERVER_NAME=0.0.0.0
33
  ENV GRADIO_SERVER_PORT=7860
34
 
 
35
  CMD ["python", "app.py"]
 
3
 
4
  RUN apt-get update && apt-get install -y \
5
  git pkg-config libssl-dev ca-certificates \
6
+ libopenblas-dev gfortran \
7
  && rm -rf /var/lib/apt/lists/*
8
 
9
+ # Clone the fork and build only the CLI crate
10
  RUN git clone --depth 1 https://github.com/cronos3k/larql /build
11
  WORKDIR /build
 
 
12
  RUN cargo build --release -p larql-cli
13
 
14
+ # Stage 2: Lean runtime image
15
  FROM python:3.11-slim-bookworm
16
 
17
+ # OpenBLAS runtime lib needed by the larql binary
18
+ RUN apt-get update && apt-get install -y libopenblas0 && rm -rf /var/lib/apt/lists/*
19
+
20
  # Copy the compiled binary
21
  COPY --from=builder /build/target/release/larql /usr/local/bin/larql
22
 
23
+ # Copy the Gradio demo app
24
  COPY app.py utils.py requirements.txt /app/
25
 
26
  WORKDIR /app
27
  RUN pip install --no-cache-dir -r requirements.txt
28
 
29
+ # HuggingFace Spaces runs as non-root UID 1000
30
+ RUN useradd -m -u 1000 hfuser && mkdir -p /app/models && chown -R hfuser /app
31
  USER hfuser
32
 
33
  EXPOSE 7860
34
  ENV GRADIO_SERVER_NAME=0.0.0.0
35
  ENV GRADIO_SERVER_PORT=7860
36
 
37
+ # app.py downloads the demo vindex from HF Hub on first startup
38
  CMD ["python", "app.py"]
app.py CHANGED
@@ -27,7 +27,52 @@ from utils import (
27
  # Paths & defaults
28
  # ---------------------------------------------------------------------------
29
  REPO_ROOT = Path(__file__).parent.parent
30
- MODELS_DIR = REPO_ROOT / "models"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  def get_vindex_choices():
33
  paths = list_local_vindexes(str(MODELS_DIR)) if MODELS_DIR.exists() else []
 
27
  # Paths & defaults
28
  # ---------------------------------------------------------------------------
29
  REPO_ROOT = Path(__file__).parent.parent
30
+ # On HF Spaces (Docker) __file__ is /app/app.py, so REPO_ROOT is /
31
+ # Store the demo vindex alongside the app instead
32
+ _RUNNING_IN_SPACE = os.environ.get("SPACE_ID") is not None or Path("/app").exists()
33
+ MODELS_DIR = Path("/app/models") if _RUNNING_IN_SPACE else REPO_ROOT / "models"
34
+
35
+ # ---------------------------------------------------------------------------
36
+ # Demo vindex: auto-download from HF if no local vindexes are found
37
+ # ---------------------------------------------------------------------------
38
+ DEMO_DATASET = "cronos3k/qwen2.5-0.5b-instruct-vindex"
39
+ DEMO_VINDEX_DIR = MODELS_DIR / "qwen2.5-0.5b-instruct.vindex"
40
+
41
+ def maybe_download_demo_vindex(progress_fn=None):
42
+ """
43
+ Download the demo vindex from HF Hub if no local vindexes are available.
44
+ Called once at startup. Safe to call multiple times (no-op if already present).
45
+ """
46
+ # Already have it?
47
+ if (DEMO_VINDEX_DIR / "index.json").exists():
48
+ return str(DEMO_VINDEX_DIR)
49
+ # Any other local vindex?
50
+ if list_local_vindexes(str(MODELS_DIR)):
51
+ return None
52
+
53
+ try:
54
+ import huggingface_hub as hfh
55
+ except ImportError:
56
+ print("[demo] huggingface_hub not installed — skipping demo vindex download.")
57
+ return None
58
+
59
+ print(f"[demo] No local vindex found. Downloading demo from {DEMO_DATASET}...")
60
+ DEMO_VINDEX_DIR.mkdir(parents=True, exist_ok=True)
61
+ try:
62
+ hfh.snapshot_download(
63
+ repo_id=DEMO_DATASET,
64
+ repo_type="dataset",
65
+ local_dir=str(DEMO_VINDEX_DIR),
66
+ ignore_patterns=["*.md"], # skip dataset card
67
+ )
68
+ print(f"[demo] Demo vindex ready at {DEMO_VINDEX_DIR}")
69
+ return str(DEMO_VINDEX_DIR)
70
+ except Exception as e:
71
+ print(f"[demo] Could not download demo vindex: {e}")
72
+ return None
73
+
74
+ # Download at startup (blocking — fast on HF Spaces internal network, ~5-10s)
75
+ maybe_download_demo_vindex()
76
 
77
  def get_vindex_choices():
78
  paths = list_local_vindexes(str(MODELS_DIR)) if MODELS_DIR.exists() else []
requirements.txt CHANGED
@@ -1,2 +1,3 @@
1
  gradio>=6.0.0
2
  pandas>=2.0.0
 
 
1
  gradio>=6.0.0
2
  pandas>=2.0.0
3
+ huggingface_hub>=0.20.0