TSXu commited on
Commit
0048632
·
1 Parent(s): 1b5453a

FA3 + bf16 (clean version)

Browse files

- Use bf16 dtype (FA3 requires bf16/fp16, not fp32)
- Optimized compiled graph check (use metadata instead of list)
- No FP8/Int8 quantization, just FA3 + AOT compilation

Files changed (1) hide show
  1. app.py +7 -8
app.py CHANGED
@@ -161,14 +161,13 @@ HF_CACHE_FILENAME = "compiled_graph.pt2"
161
 
162
 
163
  def _check_compiled_graph_exists():
164
- """Check if compiled graph exists on HF Hub"""
165
- from huggingface_hub import hf_hub_download, HfApi
166
  try:
167
- api = HfApi()
168
- files = api.list_repo_files(HF_CACHE_REPO)
169
- return HF_CACHE_FILENAME in files
170
- except Exception as e:
171
- logger.info(f"Could not check Hub for compiled graph: {e}")
172
  return False
173
 
174
 
@@ -263,7 +262,7 @@ def init_generator():
263
  use_4bit_quantization=False,
264
  use_float8_quantization=False,
265
  use_torch_compile=False,
266
- dtype="fp32",
267
  )
268
 
269
  return generator
 
161
 
162
 
163
  def _check_compiled_graph_exists():
164
+ """Check if compiled graph exists on HF Hub (fast check)"""
165
+ from huggingface_hub import hf_hub_url, get_hf_file_metadata
166
  try:
167
+ url = hf_hub_url(HF_CACHE_REPO, HF_CACHE_FILENAME)
168
+ get_hf_file_metadata(url) # Raises if file doesn't exist
169
+ return True
170
+ except Exception:
 
171
  return False
172
 
173
 
 
262
  use_4bit_quantization=False,
263
  use_float8_quantization=False,
264
  use_torch_compile=False,
265
+ dtype="bf16", # FA3 requires bf16/fp16
266
  )
267
 
268
  return generator