samwaugh commited on
Commit
80ac589
Β·
1 Parent(s): 944a1aa

Try to get runner to work with space

Browse files
backend/runner/app.py CHANGED
@@ -57,15 +57,13 @@ CORS(app, resources={r"/*": {"origins": "*"}}) # allow SPA on :5173
57
  executor = ThreadPoolExecutor(max_workers=4)
58
 
59
  # Use the Space data volume, not the repo folder
60
- DATA_ROOT = Path(os.getenv("DATA_ROOT", "/data")).resolve()
61
- ARTIFACTS_DIR = DATA_ROOT / "artifacts"
62
- OUTPUTS_DIR = DATA_ROOT / "outputs"
63
- JSON_INFO_DIR = DATA_ROOT / "json_info" # optional in Phase 1
64
- MARKER_DIR = DATA_ROOT / "marker_output" # optional in Phase 1
65
-
66
- # Create directories if they don't exist
67
- for p in (ARTIFACTS_DIR, OUTPUTS_DIR, JSON_INFO_DIR, MARKER_DIR):
68
- p.mkdir(parents=True, exist_ok=True)
69
 
70
  # --------------------------------------------------------------------------- #
71
  # Global Data (safe loading for Phase 1) #
 
57
  executor = ThreadPoolExecutor(max_workers=4)
58
 
59
  # Use the Space data volume, not the repo folder
60
+ from .config import (
61
+ DATA_ROOT,
62
+ ARTIFACTS_DIR,
63
+ OUTPUTS_DIR,
64
+ JSON_INFO_DIR,
65
+ MARKER_DIR
66
+ )
 
 
67
 
68
  # --------------------------------------------------------------------------- #
69
  # Global Data (safe loading for Phase 1) #
backend/runner/config.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Unified configuration for data paths in Hugging Face Spaces.
3
+ All runner modules should import from this module instead of defining their own paths.
4
+ """
5
+
6
+ import os
7
+ from pathlib import Path
8
+
9
+ # Data root from environment variable (set by Hugging Face Space)
10
+ DATA_ROOT = Path(os.getenv("DATA_ROOT", "/data")).resolve()
11
+
12
+ # Core data directories
13
+ EMBEDDINGS_DIR = DATA_ROOT / "embeddings"
14
+ JSON_INFO_DIR = DATA_ROOT / "json_info"
15
+ MODELS_DIR = DATA_ROOT / "models"
16
+ OUTPUTS_DIR = DATA_ROOT / "outputs"
17
+ ARTIFACTS_DIR = DATA_ROOT / "artifacts"
18
+ MARKER_DIR = DATA_ROOT / "marker_output"
19
+
20
+ # Model-specific embedding directories
21
+ CLIP_EMBEDDINGS_DIR = EMBEDDINGS_DIR / "CLIP_Embeddings"
22
+ PAINTINGCLIP_EMBEDDINGS_DIR = EMBEDDINGS_DIR / "PaintingCLIP_Embeddings"
23
+
24
+ # Model directories
25
+ PAINTINGCLIP_MODEL_DIR = MODELS_DIR / "PaintingCLIP"
26
+
27
+ # Metadata files
28
+ SENTENCES_JSON = JSON_INFO_DIR / "sentences.json"
29
+ WORKS_JSON = JSON_INFO_DIR / "works.json"
30
+ TOPICS_JSON = JSON_INFO_DIR / "topics.json"
31
+ CREATORS_JSON = JSON_INFO_DIR / "creators.json"
32
+ TOPIC_NAMES_JSON = JSON_INFO_DIR / "topic_names.json"
33
+
34
+ # Ensure directories exist
35
+ for directory in [EMBEDDINGS_DIR, JSON_INFO_DIR, MODELS_DIR, OUTPUTS_DIR, ARTIFACTS_DIR, MARKER_DIR]:
36
+ directory.mkdir(parents=True, exist_ok=True)
backend/runner/filtering.py CHANGED
@@ -6,22 +6,25 @@ import json
6
  from pathlib import Path
7
  from typing import Any, Dict, List, Set
8
 
9
- # Load data files
10
- ROOT = Path(__file__).resolve().parents[2]
11
- DATA_DIR = ROOT / "data" / "json_info"
 
 
 
 
12
 
13
- # Load all necessary data
14
- with open(DATA_DIR / "sentences.json", "r", encoding="utf-8") as f:
15
  SENTENCES = json.load(f)
16
 
17
- with open(DATA_DIR / "works.json", "r", encoding="utf-8") as f:
18
  WORKS = json.load(f)
19
 
20
- with open(DATA_DIR / "topics.json", "r", encoding="utf-8") as f:
21
  TOPICS = json.load(f)
22
 
23
- # Load creators mapping
24
- with open(DATA_DIR / "creators.json", "r", encoding="utf-8") as f:
25
  CREATORS_MAP = json.load(f)
26
 
27
 
 
6
  from pathlib import Path
7
  from typing import Any, Dict, List, Set
8
 
9
+ # Import configuration from unified config module
10
+ from .config import (
11
+ SENTENCES_JSON,
12
+ WORKS_JSON,
13
+ TOPICS_JSON,
14
+ CREATORS_JSON
15
+ )
16
 
17
+ # Load data files
18
+ with open(SENTENCES_JSON, "r", encoding="utf-8") as f:
19
  SENTENCES = json.load(f)
20
 
21
+ with open(WORKS_JSON, "r", encoding="utf-8") as f:
22
  WORKS = json.load(f)
23
 
24
+ with open(TOPICS_JSON, "r", encoding="utf-8") as f:
25
  TOPICS = json.load(f)
26
 
27
+ with open(CREATORS_JSON, "r", encoding="utf-8") as f:
 
28
  CREATORS_MAP = json.load(f)
29
 
30
 
backend/runner/inference.py CHANGED
@@ -29,31 +29,34 @@ from transformers import CLIPModel, CLIPProcessor
29
  from .filtering import get_filtered_sentence_ids
30
  # on-demand Grad-ECLIP & region-aware ranking
31
  from .heatmap import generate_heatmap
 
 
 
 
 
 
32
 
33
  # ─── Configuration ───────────────────────────────────────────────────────────
34
- ROOT = Path(__file__).resolve().parents[2] # artefact-context/
35
-
36
- # Model selection - change this to switch between models
37
  MODEL_TYPE: Literal["clip", "paintingclip"] = "paintingclip"
38
 
39
- # Model paths and settings
40
  MODEL_CONFIG = {
41
  "clip": {
42
  "model_id": "openai/clip-vit-base-patch32",
43
- "embeddings_dir": ROOT / "data" / "embeddings" / "CLIP_Embeddings",
44
  "use_lora": False,
45
  "lora_dir": None,
46
  },
47
  "paintingclip": {
48
  "model_id": "openai/clip-vit-base-patch32",
49
- "embeddings_dir": ROOT / "data" / "embeddings" / "PaintingCLIP_Embeddings",
50
  "use_lora": True,
51
- "lora_dir": ROOT / "data" / "models" / "PaintingCLIP",
52
  },
53
  }
54
 
55
  # Data paths
56
- SENTENCES_JSON = ROOT / "data" / "json_info" / "sentences.json"
57
 
58
  # Inference settings
59
  TOP_K = 25 # Number of results to return
 
29
  from .filtering import get_filtered_sentence_ids
30
  # on-demand Grad-ECLIP & region-aware ranking
31
  from .heatmap import generate_heatmap
32
+ from .config import (
33
+ CLIP_EMBEDDINGS_DIR,
34
+ PAINTINGCLIP_EMBEDDINGS_DIR,
35
+ PAINTINGCLIP_MODEL_DIR,
36
+ SENTENCES_JSON
37
+ )
38
 
39
  # ─── Configuration ───────────────────────────────────────────────────────────
 
 
 
40
  MODEL_TYPE: Literal["clip", "paintingclip"] = "paintingclip"
41
 
42
+ # Model selection - change this to switch between models
43
  MODEL_CONFIG = {
44
  "clip": {
45
  "model_id": "openai/clip-vit-base-patch32",
46
+ "embeddings_dir": CLIP_EMBEDDINGS_DIR,
47
  "use_lora": False,
48
  "lora_dir": None,
49
  },
50
  "paintingclip": {
51
  "model_id": "openai/clip-vit-base-patch32",
52
+ "embeddings_dir": PAINTINGCLIP_EMBEDDINGS_DIR,
53
  "use_lora": True,
54
+ "lora_dir": PAINTINGCLIP_MODEL_DIR,
55
  },
56
  }
57
 
58
  # Data paths
59
+ # SENTENCES_JSON = ROOT / "data" / "json_info" / "sentences.json"
60
 
61
  # Inference settings
62
  TOP_K = 25 # Number of results to return
backend/runner/tasks.py CHANGED
@@ -9,6 +9,7 @@ import time
9
  from datetime import datetime, timezone
10
 
11
  from .inference import run_inference
 
12
 
13
  # In-memory runs store and lock for thread-safe updates
14
  runs: dict[str, dict] = {}
@@ -19,8 +20,10 @@ FORCE_ERROR = os.getenv("FORCE_ERROR") == "1"
19
  SLEEP_SECS = int(os.getenv("SLEEP_SECS", "0"))
20
 
21
  # Get the base directory for file storage (project root)
22
- BASE_DIR = os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
23
- OUTPUTS_DIR = os.path.join(BASE_DIR, "data", "outputs")
 
 
24
 
25
 
26
  def run_task(
 
9
  from datetime import datetime, timezone
10
 
11
  from .inference import run_inference
12
+ from .config import OUTPUTS_DIR
13
 
14
  # In-memory runs store and lock for thread-safe updates
15
  runs: dict[str, dict] = {}
 
20
  SLEEP_SECS = int(os.getenv("SLEEP_SECS", "0"))
21
 
22
  # Get the base directory for file storage (project root)
23
+ # BASE_DIR = os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
24
+ # OUTPUTS_DIR = os.path.join(BASE_DIR, "data", "outputs")
25
+
26
+ # OUTPUTS_DIR is now imported from config
27
 
28
 
29
  def run_task(