Spaces:
Running on Zero
Running on Zero
Upload config.py with huggingface_hub
Browse files
config.py
CHANGED
|
@@ -16,6 +16,13 @@ USE_27B = os.environ.get("USE_27B", "false").lower() == "true"
|
|
| 16 |
QUANTIZE_4B = os.environ.get("QUANTIZE_4B", "true").lower() == "true"
|
| 17 |
ENABLE_MEDASR = os.environ.get("ENABLE_MEDASR", "true").lower() == "true"
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
# --- Prompt Repetition (arXiv:2512.14982) ---
|
| 20 |
# Repeating the user prompt improves non-reasoning LLM performance (47 wins, 0 losses
|
| 21 |
# across 70 benchmark-model combos). Only increases prefill tokens, no extra generation.
|
|
|
|
| 16 |
QUANTIZE_4B = os.environ.get("QUANTIZE_4B", "true").lower() == "true"
|
| 17 |
ENABLE_MEDASR = os.environ.get("ENABLE_MEDASR", "true").lower() == "true"
|
| 18 |
|
| 19 |
+
# --- Performance Optimization ---
|
| 20 |
+
# torch.compile: JIT 编译加速,首次推理慢(编译),后续快 30-80%
|
| 21 |
+
# 默认关闭:ZeroGPU 冷启动每次都要重新编译,不划算
|
| 22 |
+
ENABLE_TORCH_COMPILE = os.environ.get("ENABLE_TORCH_COMPILE", "false").lower() == "true"
|
| 23 |
+
# SDPA: 优化注意力计算,省显存 + 加速(无编译开销)
|
| 24 |
+
ENABLE_SDPA = os.environ.get("ENABLE_SDPA", "true").lower() == "true"
|
| 25 |
+
|
| 26 |
# --- Prompt Repetition (arXiv:2512.14982) ---
|
| 27 |
# Repeating the user prompt improves non-reasoning LLM performance (47 wins, 0 losses
|
| 28 |
# across 70 benchmark-model combos). Only increases prefill tokens, no extra generation.
|