Spaces:

samwaugh
/

ArteFact

Paused

samwaugh commited on Aug 13, 2025

Commit

80ac589

1 Parent(s): 944a1aa

Try to get runner to work with space

Files changed (5) hide show

backend/runner/app.py CHANGED Viewed

@@ -57,15 +57,13 @@ CORS(app, resources={r"/*": {"origins": "*"}})  # allow SPA on :5173
 executor = ThreadPoolExecutor(max_workers=4)
 # Use the Space data volume, not the repo folder
-DATA_ROOT = Path(os.getenv("DATA_ROOT", "/data")).resolve()
-ARTIFACTS_DIR = DATA_ROOT / "artifacts"
-OUTPUTS_DIR = DATA_ROOT / "outputs"
-JSON_INFO_DIR = DATA_ROOT / "json_info"      # optional in Phase 1
-MARKER_DIR = DATA_ROOT / "marker_output"     # optional in Phase 1
-# Create directories if they don't exist
-for p in (ARTIFACTS_DIR, OUTPUTS_DIR, JSON_INFO_DIR, MARKER_DIR):
-    p.mkdir(parents=True, exist_ok=True)
 # --------------------------------------------------------------------------- #
 #  Global Data (safe loading for Phase 1)                                    #

 executor = ThreadPoolExecutor(max_workers=4)
 # Use the Space data volume, not the repo folder
+from .config import (
+    DATA_ROOT,
+    ARTIFACTS_DIR,
+    OUTPUTS_DIR,
+    JSON_INFO_DIR,
+    MARKER_DIR
+)
 # --------------------------------------------------------------------------- #
 #  Global Data (safe loading for Phase 1)                                    #

backend/runner/config.py ADDED Viewed

+"""
+Unified configuration for data paths in Hugging Face Spaces.
+All runner modules should import from this module instead of defining their own paths.
+"""
+import os
+from pathlib import Path
+# Data root from environment variable (set by Hugging Face Space)
+DATA_ROOT = Path(os.getenv("DATA_ROOT", "/data")).resolve()
+# Core data directories
+EMBEDDINGS_DIR = DATA_ROOT / "embeddings"
+JSON_INFO_DIR = DATA_ROOT / "json_info"
+MODELS_DIR = DATA_ROOT / "models"
+OUTPUTS_DIR = DATA_ROOT / "outputs"
+ARTIFACTS_DIR = DATA_ROOT / "artifacts"
+MARKER_DIR = DATA_ROOT / "marker_output"
+# Model-specific embedding directories
+CLIP_EMBEDDINGS_DIR = EMBEDDINGS_DIR / "CLIP_Embeddings"
+PAINTINGCLIP_EMBEDDINGS_DIR = EMBEDDINGS_DIR / "PaintingCLIP_Embeddings"
+# Model directories
+PAINTINGCLIP_MODEL_DIR = MODELS_DIR / "PaintingCLIP"
+# Metadata files
+SENTENCES_JSON = JSON_INFO_DIR / "sentences.json"
+WORKS_JSON = JSON_INFO_DIR / "works.json"
+TOPICS_JSON = JSON_INFO_DIR / "topics.json"
+CREATORS_JSON = JSON_INFO_DIR / "creators.json"
+TOPIC_NAMES_JSON = JSON_INFO_DIR / "topic_names.json"
+# Ensure directories exist
+for directory in [EMBEDDINGS_DIR, JSON_INFO_DIR, MODELS_DIR, OUTPUTS_DIR, ARTIFACTS_DIR, MARKER_DIR]:
+    directory.mkdir(parents=True, exist_ok=True)

backend/runner/filtering.py CHANGED Viewed

@@ -6,22 +6,25 @@ import json
 from pathlib import Path
 from typing import Any, Dict, List, Set
-# Load data files
-ROOT = Path(__file__).resolve().parents[2]
-DATA_DIR = ROOT / "data" / "json_info"
-# Load all necessary data
-with open(DATA_DIR / "sentences.json", "r", encoding="utf-8") as f:
     SENTENCES = json.load(f)
-with open(DATA_DIR / "works.json", "r", encoding="utf-8") as f:
     WORKS = json.load(f)
-with open(DATA_DIR / "topics.json", "r", encoding="utf-8") as f:
     TOPICS = json.load(f)
-# Load creators mapping
-with open(DATA_DIR / "creators.json", "r", encoding="utf-8") as f:
     CREATORS_MAP = json.load(f)

 from pathlib import Path
 from typing import Any, Dict, List, Set
+# Import configuration from unified config module
+from .config import (
+    SENTENCES_JSON,
+    WORKS_JSON,
+    TOPICS_JSON,
+    CREATORS_JSON
+)
+# Load data files
+with open(SENTENCES_JSON, "r", encoding="utf-8") as f:
     SENTENCES = json.load(f)
+with open(WORKS_JSON, "r", encoding="utf-8") as f:
     WORKS = json.load(f)
+with open(TOPICS_JSON, "r", encoding="utf-8") as f:
     TOPICS = json.load(f)
+with open(CREATORS_JSON, "r", encoding="utf-8") as f:
     CREATORS_MAP = json.load(f)

backend/runner/inference.py CHANGED Viewed

@@ -29,31 +29,34 @@ from transformers import CLIPModel, CLIPProcessor
 from .filtering import get_filtered_sentence_ids
 # on-demand Grad-ECLIP & region-aware ranking
 from .heatmap import generate_heatmap
 # ─── Configuration ───────────────────────────────────────────────────────────
-ROOT = Path(__file__).resolve().parents[2]  # artefact-context/
-# Model selection - change this to switch between models
 MODEL_TYPE: Literal["clip", "paintingclip"] = "paintingclip"
-# Model paths and settings
 MODEL_CONFIG = {
     "clip": {
         "model_id": "openai/clip-vit-base-patch32",
-        "embeddings_dir": ROOT / "data" / "embeddings" / "CLIP_Embeddings",
         "use_lora": False,
         "lora_dir": None,
     },
     "paintingclip": {
         "model_id": "openai/clip-vit-base-patch32",
-        "embeddings_dir": ROOT / "data" / "embeddings" / "PaintingCLIP_Embeddings",
         "use_lora": True,
-        "lora_dir": ROOT / "data" / "models" / "PaintingCLIP",
     },
 }
 # Data paths
-SENTENCES_JSON = ROOT / "data" / "json_info" / "sentences.json"
 # Inference settings
 TOP_K = 25  # Number of results to return

 from .filtering import get_filtered_sentence_ids
 # on-demand Grad-ECLIP & region-aware ranking
 from .heatmap import generate_heatmap
+from .config import (
+    CLIP_EMBEDDINGS_DIR,
+    PAINTINGCLIP_EMBEDDINGS_DIR,
+    PAINTINGCLIP_MODEL_DIR,
+    SENTENCES_JSON
+)
 # ─── Configuration ───────────────────────────────────────────────────────────
 MODEL_TYPE: Literal["clip", "paintingclip"] = "paintingclip"
+# Model selection - change this to switch between models
 MODEL_CONFIG = {
     "clip": {
         "model_id": "openai/clip-vit-base-patch32",
+        "embeddings_dir": CLIP_EMBEDDINGS_DIR,
         "use_lora": False,
         "lora_dir": None,
     },
     "paintingclip": {
         "model_id": "openai/clip-vit-base-patch32",
+        "embeddings_dir": PAINTINGCLIP_EMBEDDINGS_DIR,
         "use_lora": True,
+        "lora_dir": PAINTINGCLIP_MODEL_DIR,
     },
 }
 # Data paths
+# SENTENCES_JSON = ROOT / "data" / "json_info" / "sentences.json"
 # Inference settings
 TOP_K = 25  # Number of results to return

backend/runner/tasks.py CHANGED Viewed

@@ -9,6 +9,7 @@ import time
 from datetime import datetime, timezone
 from .inference import run_inference
 # In-memory runs store and lock for thread-safe updates
 runs: dict[str, dict] = {}
@@ -19,8 +20,10 @@ FORCE_ERROR = os.getenv("FORCE_ERROR") == "1"
 SLEEP_SECS = int(os.getenv("SLEEP_SECS", "0"))
 # Get the base directory for file storage (project root)
-BASE_DIR = os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
-OUTPUTS_DIR = os.path.join(BASE_DIR, "data", "outputs")
 def run_task(

 from datetime import datetime, timezone
 from .inference import run_inference
+from .config import OUTPUTS_DIR
 # In-memory runs store and lock for thread-safe updates
 runs: dict[str, dict] = {}
 SLEEP_SECS = int(os.getenv("SLEEP_SECS", "0"))
 # Get the base directory for file storage (project root)
+# BASE_DIR = os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+# OUTPUTS_DIR = os.path.join(BASE_DIR, "data", "outputs")
+# OUTPUTS_DIR is now imported from config
 def run_task(