Spaces:
Sleeping
Sleeping
FA3 + bf16 (clean version)
Browse files- Use bf16 dtype (FA3 requires bf16/fp16, not fp32)
- Optimized compiled graph check (use metadata instead of list)
- No FP8/Int8 quantization, just FA3 + AOT compilation
app.py
CHANGED
|
@@ -161,14 +161,13 @@ HF_CACHE_FILENAME = "compiled_graph.pt2"
|
|
| 161 |
|
| 162 |
|
| 163 |
def _check_compiled_graph_exists():
|
| 164 |
-
"""Check if compiled graph exists on HF Hub"""
|
| 165 |
-
from huggingface_hub import
|
| 166 |
try:
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
return
|
| 170 |
-
except Exception
|
| 171 |
-
logger.info(f"Could not check Hub for compiled graph: {e}")
|
| 172 |
return False
|
| 173 |
|
| 174 |
|
|
@@ -263,7 +262,7 @@ def init_generator():
|
|
| 263 |
use_4bit_quantization=False,
|
| 264 |
use_float8_quantization=False,
|
| 265 |
use_torch_compile=False,
|
| 266 |
-
dtype="
|
| 267 |
)
|
| 268 |
|
| 269 |
return generator
|
|
|
|
| 161 |
|
| 162 |
|
| 163 |
def _check_compiled_graph_exists():
|
| 164 |
+
"""Check if compiled graph exists on HF Hub (fast check)"""
|
| 165 |
+
from huggingface_hub import hf_hub_url, get_hf_file_metadata
|
| 166 |
try:
|
| 167 |
+
url = hf_hub_url(HF_CACHE_REPO, HF_CACHE_FILENAME)
|
| 168 |
+
get_hf_file_metadata(url) # Raises if file doesn't exist
|
| 169 |
+
return True
|
| 170 |
+
except Exception:
|
|
|
|
| 171 |
return False
|
| 172 |
|
| 173 |
|
|
|
|
| 262 |
use_4bit_quantization=False,
|
| 263 |
use_float8_quantization=False,
|
| 264 |
use_torch_compile=False,
|
| 265 |
+
dtype="bf16", # FA3 requires bf16/fp16
|
| 266 |
)
|
| 267 |
|
| 268 |
return generator
|