Try to get runner to work with space
Browse files- backend/runner/app.py +7 -9
- backend/runner/config.py +36 -0
- backend/runner/filtering.py +12 -9
- backend/runner/inference.py +11 -8
- backend/runner/tasks.py +5 -2
backend/runner/app.py
CHANGED
|
@@ -57,15 +57,13 @@ CORS(app, resources={r"/*": {"origins": "*"}}) # allow SPA on :5173
|
|
| 57 |
executor = ThreadPoolExecutor(max_workers=4)
|
| 58 |
|
| 59 |
# Use the Space data volume, not the repo folder
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
for p in (ARTIFACTS_DIR, OUTPUTS_DIR, JSON_INFO_DIR, MARKER_DIR):
|
| 68 |
-
p.mkdir(parents=True, exist_ok=True)
|
| 69 |
|
| 70 |
# --------------------------------------------------------------------------- #
|
| 71 |
# Global Data (safe loading for Phase 1) #
|
|
|
|
| 57 |
executor = ThreadPoolExecutor(max_workers=4)
|
| 58 |
|
| 59 |
# Use the Space data volume, not the repo folder
|
| 60 |
+
from .config import (
|
| 61 |
+
DATA_ROOT,
|
| 62 |
+
ARTIFACTS_DIR,
|
| 63 |
+
OUTPUTS_DIR,
|
| 64 |
+
JSON_INFO_DIR,
|
| 65 |
+
MARKER_DIR
|
| 66 |
+
)
|
|
|
|
|
|
|
| 67 |
|
| 68 |
# --------------------------------------------------------------------------- #
|
| 69 |
# Global Data (safe loading for Phase 1) #
|
backend/runner/config.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Unified configuration for data paths in Hugging Face Spaces.
|
| 3 |
+
All runner modules should import from this module instead of defining their own paths.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
|
| 9 |
+
# Data root from environment variable (set by Hugging Face Space)
|
| 10 |
+
DATA_ROOT = Path(os.getenv("DATA_ROOT", "/data")).resolve()
|
| 11 |
+
|
| 12 |
+
# Core data directories
|
| 13 |
+
EMBEDDINGS_DIR = DATA_ROOT / "embeddings"
|
| 14 |
+
JSON_INFO_DIR = DATA_ROOT / "json_info"
|
| 15 |
+
MODELS_DIR = DATA_ROOT / "models"
|
| 16 |
+
OUTPUTS_DIR = DATA_ROOT / "outputs"
|
| 17 |
+
ARTIFACTS_DIR = DATA_ROOT / "artifacts"
|
| 18 |
+
MARKER_DIR = DATA_ROOT / "marker_output"
|
| 19 |
+
|
| 20 |
+
# Model-specific embedding directories
|
| 21 |
+
CLIP_EMBEDDINGS_DIR = EMBEDDINGS_DIR / "CLIP_Embeddings"
|
| 22 |
+
PAINTINGCLIP_EMBEDDINGS_DIR = EMBEDDINGS_DIR / "PaintingCLIP_Embeddings"
|
| 23 |
+
|
| 24 |
+
# Model directories
|
| 25 |
+
PAINTINGCLIP_MODEL_DIR = MODELS_DIR / "PaintingCLIP"
|
| 26 |
+
|
| 27 |
+
# Metadata files
|
| 28 |
+
SENTENCES_JSON = JSON_INFO_DIR / "sentences.json"
|
| 29 |
+
WORKS_JSON = JSON_INFO_DIR / "works.json"
|
| 30 |
+
TOPICS_JSON = JSON_INFO_DIR / "topics.json"
|
| 31 |
+
CREATORS_JSON = JSON_INFO_DIR / "creators.json"
|
| 32 |
+
TOPIC_NAMES_JSON = JSON_INFO_DIR / "topic_names.json"
|
| 33 |
+
|
| 34 |
+
# Ensure directories exist
|
| 35 |
+
for directory in [EMBEDDINGS_DIR, JSON_INFO_DIR, MODELS_DIR, OUTPUTS_DIR, ARTIFACTS_DIR, MARKER_DIR]:
|
| 36 |
+
directory.mkdir(parents=True, exist_ok=True)
|
backend/runner/filtering.py
CHANGED
|
@@ -6,22 +6,25 @@ import json
|
|
| 6 |
from pathlib import Path
|
| 7 |
from typing import Any, Dict, List, Set
|
| 8 |
|
| 9 |
-
#
|
| 10 |
-
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
-
# Load
|
| 14 |
-
with open(
|
| 15 |
SENTENCES = json.load(f)
|
| 16 |
|
| 17 |
-
with open(
|
| 18 |
WORKS = json.load(f)
|
| 19 |
|
| 20 |
-
with open(
|
| 21 |
TOPICS = json.load(f)
|
| 22 |
|
| 23 |
-
|
| 24 |
-
with open(DATA_DIR / "creators.json", "r", encoding="utf-8") as f:
|
| 25 |
CREATORS_MAP = json.load(f)
|
| 26 |
|
| 27 |
|
|
|
|
| 6 |
from pathlib import Path
|
| 7 |
from typing import Any, Dict, List, Set
|
| 8 |
|
| 9 |
+
# Import configuration from unified config module
|
| 10 |
+
from .config import (
|
| 11 |
+
SENTENCES_JSON,
|
| 12 |
+
WORKS_JSON,
|
| 13 |
+
TOPICS_JSON,
|
| 14 |
+
CREATORS_JSON
|
| 15 |
+
)
|
| 16 |
|
| 17 |
+
# Load data files
|
| 18 |
+
with open(SENTENCES_JSON, "r", encoding="utf-8") as f:
|
| 19 |
SENTENCES = json.load(f)
|
| 20 |
|
| 21 |
+
with open(WORKS_JSON, "r", encoding="utf-8") as f:
|
| 22 |
WORKS = json.load(f)
|
| 23 |
|
| 24 |
+
with open(TOPICS_JSON, "r", encoding="utf-8") as f:
|
| 25 |
TOPICS = json.load(f)
|
| 26 |
|
| 27 |
+
with open(CREATORS_JSON, "r", encoding="utf-8") as f:
|
|
|
|
| 28 |
CREATORS_MAP = json.load(f)
|
| 29 |
|
| 30 |
|
backend/runner/inference.py
CHANGED
|
@@ -29,31 +29,34 @@ from transformers import CLIPModel, CLIPProcessor
|
|
| 29 |
from .filtering import get_filtered_sentence_ids
|
| 30 |
# on-demand Grad-ECLIP & region-aware ranking
|
| 31 |
from .heatmap import generate_heatmap
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
# βββ Configuration βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 34 |
-
ROOT = Path(__file__).resolve().parents[2] # artefact-context/
|
| 35 |
-
|
| 36 |
-
# Model selection - change this to switch between models
|
| 37 |
MODEL_TYPE: Literal["clip", "paintingclip"] = "paintingclip"
|
| 38 |
|
| 39 |
-
# Model
|
| 40 |
MODEL_CONFIG = {
|
| 41 |
"clip": {
|
| 42 |
"model_id": "openai/clip-vit-base-patch32",
|
| 43 |
-
"embeddings_dir":
|
| 44 |
"use_lora": False,
|
| 45 |
"lora_dir": None,
|
| 46 |
},
|
| 47 |
"paintingclip": {
|
| 48 |
"model_id": "openai/clip-vit-base-patch32",
|
| 49 |
-
"embeddings_dir":
|
| 50 |
"use_lora": True,
|
| 51 |
-
"lora_dir":
|
| 52 |
},
|
| 53 |
}
|
| 54 |
|
| 55 |
# Data paths
|
| 56 |
-
SENTENCES_JSON = ROOT / "data" / "json_info" / "sentences.json"
|
| 57 |
|
| 58 |
# Inference settings
|
| 59 |
TOP_K = 25 # Number of results to return
|
|
|
|
| 29 |
from .filtering import get_filtered_sentence_ids
|
| 30 |
# on-demand Grad-ECLIP & region-aware ranking
|
| 31 |
from .heatmap import generate_heatmap
|
| 32 |
+
from .config import (
|
| 33 |
+
CLIP_EMBEDDINGS_DIR,
|
| 34 |
+
PAINTINGCLIP_EMBEDDINGS_DIR,
|
| 35 |
+
PAINTINGCLIP_MODEL_DIR,
|
| 36 |
+
SENTENCES_JSON
|
| 37 |
+
)
|
| 38 |
|
| 39 |
# βββ Configuration βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
|
|
|
|
|
|
| 40 |
MODEL_TYPE: Literal["clip", "paintingclip"] = "paintingclip"
|
| 41 |
|
| 42 |
+
# Model selection - change this to switch between models
|
| 43 |
MODEL_CONFIG = {
|
| 44 |
"clip": {
|
| 45 |
"model_id": "openai/clip-vit-base-patch32",
|
| 46 |
+
"embeddings_dir": CLIP_EMBEDDINGS_DIR,
|
| 47 |
"use_lora": False,
|
| 48 |
"lora_dir": None,
|
| 49 |
},
|
| 50 |
"paintingclip": {
|
| 51 |
"model_id": "openai/clip-vit-base-patch32",
|
| 52 |
+
"embeddings_dir": PAINTINGCLIP_EMBEDDINGS_DIR,
|
| 53 |
"use_lora": True,
|
| 54 |
+
"lora_dir": PAINTINGCLIP_MODEL_DIR,
|
| 55 |
},
|
| 56 |
}
|
| 57 |
|
| 58 |
# Data paths
|
| 59 |
+
# SENTENCES_JSON = ROOT / "data" / "json_info" / "sentences.json"
|
| 60 |
|
| 61 |
# Inference settings
|
| 62 |
TOP_K = 25 # Number of results to return
|
backend/runner/tasks.py
CHANGED
|
@@ -9,6 +9,7 @@ import time
|
|
| 9 |
from datetime import datetime, timezone
|
| 10 |
|
| 11 |
from .inference import run_inference
|
|
|
|
| 12 |
|
| 13 |
# In-memory runs store and lock for thread-safe updates
|
| 14 |
runs: dict[str, dict] = {}
|
|
@@ -19,8 +20,10 @@ FORCE_ERROR = os.getenv("FORCE_ERROR") == "1"
|
|
| 19 |
SLEEP_SECS = int(os.getenv("SLEEP_SECS", "0"))
|
| 20 |
|
| 21 |
# Get the base directory for file storage (project root)
|
| 22 |
-
BASE_DIR = os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
| 23 |
-
OUTPUTS_DIR = os.path.join(BASE_DIR, "data", "outputs")
|
|
|
|
|
|
|
| 24 |
|
| 25 |
|
| 26 |
def run_task(
|
|
|
|
| 9 |
from datetime import datetime, timezone
|
| 10 |
|
| 11 |
from .inference import run_inference
|
| 12 |
+
from .config import OUTPUTS_DIR
|
| 13 |
|
| 14 |
# In-memory runs store and lock for thread-safe updates
|
| 15 |
runs: dict[str, dict] = {}
|
|
|
|
| 20 |
SLEEP_SECS = int(os.getenv("SLEEP_SECS", "0"))
|
| 21 |
|
| 22 |
# Get the base directory for file storage (project root)
|
| 23 |
+
# BASE_DIR = os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
| 24 |
+
# OUTPUTS_DIR = os.path.join(BASE_DIR, "data", "outputs")
|
| 25 |
+
|
| 26 |
+
# OUTPUTS_DIR is now imported from config
|
| 27 |
|
| 28 |
|
| 29 |
def run_task(
|