Commit
·
c5c47e3
1
Parent(s):
75d63f1
Revert to Gradio 5.49.1 and ASCII logs
Browse files- README.md +1 -2
- app.py +24 -24
- requirements.txt +3 -4
README.md
CHANGED
|
@@ -4,8 +4,7 @@ emoji: "⚡"
|
|
| 4 |
colorFrom: blue
|
| 5 |
colorTo: purple
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version:
|
| 8 |
-
python_version: "3.10"
|
| 9 |
app_file: app.py
|
| 10 |
pinned: false
|
| 11 |
license: mit
|
|
|
|
| 4 |
colorFrom: blue
|
| 5 |
colorTo: purple
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 5.49.1
|
|
|
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: mit
|
app.py
CHANGED
|
@@ -18,10 +18,10 @@ startup_log = []
|
|
| 18 |
def check_import(name, import_fn):
|
| 19 |
try:
|
| 20 |
result = import_fn()
|
| 21 |
-
startup_log.append(f"
|
| 22 |
return True
|
| 23 |
except Exception as e:
|
| 24 |
-
startup_log.append(f"
|
| 25 |
return False
|
| 26 |
|
| 27 |
check_import("torch", lambda: __import__("torch").__version__)
|
|
@@ -33,18 +33,18 @@ check_import("huggingface_hub", lambda: __import__("huggingface_hub").__version_
|
|
| 33 |
|
| 34 |
try:
|
| 35 |
from trl import GRPOConfig, GRPOTrainer
|
| 36 |
-
startup_log.append("
|
| 37 |
except Exception as e:
|
| 38 |
-
startup_log.append(f"
|
| 39 |
|
| 40 |
try:
|
| 41 |
import torch
|
| 42 |
if torch.cuda.is_available():
|
| 43 |
-
startup_log.append(f"
|
| 44 |
else:
|
| 45 |
-
startup_log.append("
|
| 46 |
except Exception as e:
|
| 47 |
-
startup_log.append(f"
|
| 48 |
|
| 49 |
# Prefer simulator + KernelBuilder from bundled original_performance_takehome.
|
| 50 |
# In Spaces, this keeps evaluation consistent and enables correctness checks.
|
|
@@ -61,19 +61,19 @@ try:
|
|
| 61 |
SLOT_LIMITS, VLEN, N_CORES, SCRATCH_SIZE, CoreState
|
| 62 |
)
|
| 63 |
from perf_takehome import KernelBuilder, HASH_STAGES
|
| 64 |
-
startup_log.append("
|
| 65 |
SIMULATOR_AVAILABLE = True
|
| 66 |
except Exception as e:
|
| 67 |
-
startup_log.append(f"
|
| 68 |
SIMULATOR_AVAILABLE = False
|
| 69 |
|
| 70 |
# Hugging Face Hub adapter persistence via dataset repo
|
| 71 |
try:
|
| 72 |
from huggingface_hub import HfApi, snapshot_download
|
| 73 |
-
startup_log.append("
|
| 74 |
HF_HUB_AVAILABLE = True
|
| 75 |
except Exception as e:
|
| 76 |
-
startup_log.append(f"
|
| 77 |
HF_HUB_AVAILABLE = False
|
| 78 |
|
| 79 |
# Constants
|
|
@@ -142,7 +142,7 @@ def _adapter_exists(path: str) -> bool:
|
|
| 142 |
|
| 143 |
def _try_download_adapter(add_log) -> None:
|
| 144 |
if not HF_HUB_AVAILABLE:
|
| 145 |
-
add_log("
|
| 146 |
return
|
| 147 |
_ensure_dir(os.path.dirname(ADAPTER_DIR))
|
| 148 |
allow = [f"{ADAPTER_DATASET_SUBDIR}/**"]
|
|
@@ -170,7 +170,7 @@ def _try_download_adapter(add_log) -> None:
|
|
| 170 |
dst = os.path.join(dst_root, name)
|
| 171 |
with open(src, "rb") as fsrc, open(dst, "wb") as fdst:
|
| 172 |
fdst.write(fsrc.read())
|
| 173 |
-
add_log(f"
|
| 174 |
else:
|
| 175 |
add_log("ℹ No adapter found in dataset yet")
|
| 176 |
except Exception as e:
|
|
@@ -179,7 +179,7 @@ def _try_download_adapter(add_log) -> None:
|
|
| 179 |
|
| 180 |
def _try_upload_adapter(add_log) -> None:
|
| 181 |
if not HF_HUB_AVAILABLE:
|
| 182 |
-
add_log("
|
| 183 |
return
|
| 184 |
if not _adapter_exists(ADAPTER_DIR):
|
| 185 |
add_log("ℹ No adapter to upload yet")
|
|
@@ -198,7 +198,7 @@ def _try_upload_adapter(add_log) -> None:
|
|
| 198 |
path_in_repo=ADAPTER_DATASET_SUBDIR,
|
| 199 |
commit_message="Update perf_takehome adapter",
|
| 200 |
)
|
| 201 |
-
add_log(f"
|
| 202 |
except Exception as e:
|
| 203 |
add_log(f"ℹ Adapter upload skipped: {str(e)[:160]}")
|
| 204 |
|
|
@@ -496,7 +496,7 @@ def run_training(model_name, chunk_steps, max_total_steps, max_minutes, auto_con
|
|
| 496 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
| 497 |
if tokenizer.pad_token is None:
|
| 498 |
tokenizer.pad_token = tokenizer.eos_token
|
| 499 |
-
add_log("
|
| 500 |
|
| 501 |
# Load model with 4-bit quantization
|
| 502 |
add_log("Loading model (4-bit quantization)...")
|
|
@@ -511,7 +511,7 @@ def run_training(model_name, chunk_steps, max_total_steps, max_minutes, auto_con
|
|
| 511 |
device_map="auto",
|
| 512 |
trust_remote_code=True,
|
| 513 |
)
|
| 514 |
-
add_log(f"
|
| 515 |
|
| 516 |
# Try to restore adapter from dataset before loading it
|
| 517 |
_try_download_adapter(add_log)
|
|
@@ -521,7 +521,7 @@ def run_training(model_name, chunk_steps, max_total_steps, max_minutes, auto_con
|
|
| 521 |
if os.path.isdir(ADAPTER_DIR) and os.path.exists(os.path.join(ADAPTER_DIR, "adapter_config.json")):
|
| 522 |
add_log("Loading existing LoRA adapter (resume)...")
|
| 523 |
model = PeftModel.from_pretrained(base_model, ADAPTER_DIR, is_trainable=True)
|
| 524 |
-
add_log("
|
| 525 |
resume_adapter = True
|
| 526 |
else:
|
| 527 |
model = base_model
|
|
@@ -530,7 +530,7 @@ def run_training(model_name, chunk_steps, max_total_steps, max_minutes, auto_con
|
|
| 530 |
add_log("Creating VLIW optimization dataset...")
|
| 531 |
prompts = [PERF_TAKEHOME_PROMPT] * 16
|
| 532 |
dataset = Dataset.from_dict({"prompt": prompts})
|
| 533 |
-
add_log(f"
|
| 534 |
|
| 535 |
# LoRA config
|
| 536 |
add_log("Setting up LoRA...")
|
|
@@ -571,7 +571,7 @@ def run_training(model_name, chunk_steps, max_total_steps, max_minutes, auto_con
|
|
| 571 |
output_dir = os.path.join(PERSIST_DIR, "grpo_perf_takehome_output")
|
| 572 |
os.makedirs(output_dir, exist_ok=True)
|
| 573 |
|
| 574 |
-
add_log("
|
| 575 |
add_log("Starting training loop...")
|
| 576 |
add_log("(Stops early if target reached; can auto-continue in chunks)")
|
| 577 |
|
|
@@ -632,10 +632,10 @@ def run_training(model_name, chunk_steps, max_total_steps, max_minutes, auto_con
|
|
| 632 |
try:
|
| 633 |
os.makedirs(os.path.dirname(ADAPTER_DIR), exist_ok=True)
|
| 634 |
trainer.save_model(ADAPTER_DIR)
|
| 635 |
-
add_log(f"
|
| 636 |
_try_upload_adapter(add_log)
|
| 637 |
except Exception as e:
|
| 638 |
-
add_log(f"
|
| 639 |
|
| 640 |
if not auto_continue:
|
| 641 |
break
|
|
@@ -663,11 +663,11 @@ def run_training(model_name, chunk_steps, max_total_steps, max_minutes, auto_con
|
|
| 663 |
else:
|
| 664 |
add_log(f"Generated kernel invalid: {verify_out.get('msg', '')[:160]}")
|
| 665 |
|
| 666 |
-
add_log("\n
|
| 667 |
|
| 668 |
except Exception as e:
|
| 669 |
import traceback
|
| 670 |
-
add_log(f"
|
| 671 |
add_log(traceback.format_exc()[:800])
|
| 672 |
finally:
|
| 673 |
with state_lock:
|
|
|
|
| 18 |
def check_import(name, import_fn):
|
| 19 |
try:
|
| 20 |
result = import_fn()
|
| 21 |
+
startup_log.append(f"[OK] {name}: {result}")
|
| 22 |
return True
|
| 23 |
except Exception as e:
|
| 24 |
+
startup_log.append(f"[ERR] {name}: {str(e)[:80]}")
|
| 25 |
return False
|
| 26 |
|
| 27 |
check_import("torch", lambda: __import__("torch").__version__)
|
|
|
|
| 33 |
|
| 34 |
try:
|
| 35 |
from trl import GRPOConfig, GRPOTrainer
|
| 36 |
+
startup_log.append("[OK] GRPOTrainer: OK")
|
| 37 |
except Exception as e:
|
| 38 |
+
startup_log.append(f"[ERR] GRPOTrainer: {e}")
|
| 39 |
|
| 40 |
try:
|
| 41 |
import torch
|
| 42 |
if torch.cuda.is_available():
|
| 43 |
+
startup_log.append(f"[OK] CUDA: {torch.cuda.get_device_name(0)}")
|
| 44 |
else:
|
| 45 |
+
startup_log.append("[ERR] CUDA: Not available")
|
| 46 |
except Exception as e:
|
| 47 |
+
startup_log.append(f"[ERR] CUDA check: {e}")
|
| 48 |
|
| 49 |
# Prefer simulator + KernelBuilder from bundled original_performance_takehome.
|
| 50 |
# In Spaces, this keeps evaluation consistent and enables correctness checks.
|
|
|
|
| 61 |
SLOT_LIMITS, VLEN, N_CORES, SCRATCH_SIZE, CoreState
|
| 62 |
)
|
| 63 |
from perf_takehome import KernelBuilder, HASH_STAGES
|
| 64 |
+
startup_log.append("[OK] VLIW Simulator: OK")
|
| 65 |
SIMULATOR_AVAILABLE = True
|
| 66 |
except Exception as e:
|
| 67 |
+
startup_log.append(f"[ERR] VLIW Simulator: {e}")
|
| 68 |
SIMULATOR_AVAILABLE = False
|
| 69 |
|
| 70 |
# Hugging Face Hub adapter persistence via dataset repo
|
| 71 |
try:
|
| 72 |
from huggingface_hub import HfApi, snapshot_download
|
| 73 |
+
startup_log.append("[OK] huggingface_hub: OK")
|
| 74 |
HF_HUB_AVAILABLE = True
|
| 75 |
except Exception as e:
|
| 76 |
+
startup_log.append(f"[ERR] huggingface_hub: {str(e)[:80]}")
|
| 77 |
HF_HUB_AVAILABLE = False
|
| 78 |
|
| 79 |
# Constants
|
|
|
|
| 142 |
|
| 143 |
def _try_download_adapter(add_log) -> None:
|
| 144 |
if not HF_HUB_AVAILABLE:
|
| 145 |
+
add_log("[ERR] Hub sync disabled: huggingface_hub not available")
|
| 146 |
return
|
| 147 |
_ensure_dir(os.path.dirname(ADAPTER_DIR))
|
| 148 |
allow = [f"{ADAPTER_DATASET_SUBDIR}/**"]
|
|
|
|
| 170 |
dst = os.path.join(dst_root, name)
|
| 171 |
with open(src, "rb") as fsrc, open(dst, "wb") as fdst:
|
| 172 |
fdst.write(fsrc.read())
|
| 173 |
+
add_log(f"[OK] Downloaded adapter from dataset: {ADAPTER_DATASET_REPO}/{ADAPTER_DATASET_SUBDIR}")
|
| 174 |
else:
|
| 175 |
add_log("ℹ No adapter found in dataset yet")
|
| 176 |
except Exception as e:
|
|
|
|
| 179 |
|
| 180 |
def _try_upload_adapter(add_log) -> None:
|
| 181 |
if not HF_HUB_AVAILABLE:
|
| 182 |
+
add_log("[ERR] Hub sync disabled: huggingface_hub not available")
|
| 183 |
return
|
| 184 |
if not _adapter_exists(ADAPTER_DIR):
|
| 185 |
add_log("ℹ No adapter to upload yet")
|
|
|
|
| 198 |
path_in_repo=ADAPTER_DATASET_SUBDIR,
|
| 199 |
commit_message="Update perf_takehome adapter",
|
| 200 |
)
|
| 201 |
+
add_log(f"[OK] Uploaded adapter to dataset: {ADAPTER_DATASET_REPO}/{ADAPTER_DATASET_SUBDIR}")
|
| 202 |
except Exception as e:
|
| 203 |
add_log(f"ℹ Adapter upload skipped: {str(e)[:160]}")
|
| 204 |
|
|
|
|
| 496 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
| 497 |
if tokenizer.pad_token is None:
|
| 498 |
tokenizer.pad_token = tokenizer.eos_token
|
| 499 |
+
add_log("[OK] Tokenizer ready")
|
| 500 |
|
| 501 |
# Load model with 4-bit quantization
|
| 502 |
add_log("Loading model (4-bit quantization)...")
|
|
|
|
| 511 |
device_map="auto",
|
| 512 |
trust_remote_code=True,
|
| 513 |
)
|
| 514 |
+
add_log(f"[OK] Base model loaded on {next(base_model.parameters()).device}")
|
| 515 |
|
| 516 |
# Try to restore adapter from dataset before loading it
|
| 517 |
_try_download_adapter(add_log)
|
|
|
|
| 521 |
if os.path.isdir(ADAPTER_DIR) and os.path.exists(os.path.join(ADAPTER_DIR, "adapter_config.json")):
|
| 522 |
add_log("Loading existing LoRA adapter (resume)...")
|
| 523 |
model = PeftModel.from_pretrained(base_model, ADAPTER_DIR, is_trainable=True)
|
| 524 |
+
add_log("[OK] Adapter loaded")
|
| 525 |
resume_adapter = True
|
| 526 |
else:
|
| 527 |
model = base_model
|
|
|
|
| 530 |
add_log("Creating VLIW optimization dataset...")
|
| 531 |
prompts = [PERF_TAKEHOME_PROMPT] * 16
|
| 532 |
dataset = Dataset.from_dict({"prompt": prompts})
|
| 533 |
+
add_log(f"[OK] Dataset ready: {len(prompts)} prompts")
|
| 534 |
|
| 535 |
# LoRA config
|
| 536 |
add_log("Setting up LoRA...")
|
|
|
|
| 571 |
output_dir = os.path.join(PERSIST_DIR, "grpo_perf_takehome_output")
|
| 572 |
os.makedirs(output_dir, exist_ok=True)
|
| 573 |
|
| 574 |
+
add_log("[OK] Trainer config ready")
|
| 575 |
add_log("Starting training loop...")
|
| 576 |
add_log("(Stops early if target reached; can auto-continue in chunks)")
|
| 577 |
|
|
|
|
| 632 |
try:
|
| 633 |
os.makedirs(os.path.dirname(ADAPTER_DIR), exist_ok=True)
|
| 634 |
trainer.save_model(ADAPTER_DIR)
|
| 635 |
+
add_log(f"[OK] Saved adapter to {ADAPTER_DIR}")
|
| 636 |
_try_upload_adapter(add_log)
|
| 637 |
except Exception as e:
|
| 638 |
+
add_log(f"[ERR] Failed to save adapter: {str(e)[:120]}")
|
| 639 |
|
| 640 |
if not auto_continue:
|
| 641 |
break
|
|
|
|
| 663 |
else:
|
| 664 |
add_log(f"Generated kernel invalid: {verify_out.get('msg', '')[:160]}")
|
| 665 |
|
| 666 |
+
add_log("\n[OK] All done!")
|
| 667 |
|
| 668 |
except Exception as e:
|
| 669 |
import traceback
|
| 670 |
+
add_log(f"[ERR] Error: {e}")
|
| 671 |
add_log(traceback.format_exc()[:800])
|
| 672 |
finally:
|
| 673 |
with state_lock:
|
requirements.txt
CHANGED
|
@@ -1,10 +1,9 @@
|
|
| 1 |
torch>=2.1.0
|
| 2 |
transformers>=4.45.0
|
| 3 |
-
huggingface_hub>=0.
|
| 4 |
-
datasets
|
| 5 |
peft>=0.13.0
|
| 6 |
trl>=0.12.0
|
| 7 |
accelerate>=0.34.0
|
| 8 |
bitsandbytes>=0.44.0
|
| 9 |
-
gradio>=
|
| 10 |
-
audioop-lts>=0.2.2
|
|
|
|
| 1 |
torch>=2.1.0
|
| 2 |
transformers>=4.45.0
|
| 3 |
+
huggingface_hub>=0.30.0
|
| 4 |
+
datasets>=2.18.0
|
| 5 |
peft>=0.13.0
|
| 6 |
trl>=0.12.0
|
| 7 |
accelerate>=0.34.0
|
| 8 |
bitsandbytes>=0.44.0
|
| 9 |
+
gradio>=5.49.1,<6.0.0
|
|
|