yipengsun commited on
Commit
67651ce
·
verified ·
1 Parent(s): be83bd3

Upload config.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.py +7 -0
config.py CHANGED
@@ -16,6 +16,13 @@ USE_27B = os.environ.get("USE_27B", "false").lower() == "true"
16
  QUANTIZE_4B = os.environ.get("QUANTIZE_4B", "true").lower() == "true"
17
  ENABLE_MEDASR = os.environ.get("ENABLE_MEDASR", "true").lower() == "true"
18
 
 
 
 
 
 
 
 
19
  # --- Prompt Repetition (arXiv:2512.14982) ---
20
  # Repeating the user prompt improves non-reasoning LLM performance (47 wins, 0 losses
21
  # across 70 benchmark-model combos). Only increases prefill tokens, no extra generation.
 
16
  QUANTIZE_4B = os.environ.get("QUANTIZE_4B", "true").lower() == "true"
17
  ENABLE_MEDASR = os.environ.get("ENABLE_MEDASR", "true").lower() == "true"
18
 
19
+ # --- Performance Optimization ---
20
+ # torch.compile: JIT 编译加速,首次推理慢(编译),后续快 30-80%
21
+ # 默认关闭:ZeroGPU 冷启动每次都要重新编译,不划算
22
+ ENABLE_TORCH_COMPILE = os.environ.get("ENABLE_TORCH_COMPILE", "false").lower() == "true"
23
+ # SDPA: 优化注意力计算,省显存 + 加速(无编译开销)
24
+ ENABLE_SDPA = os.environ.get("ENABLE_SDPA", "true").lower() == "true"
25
+
26
  # --- Prompt Repetition (arXiv:2512.14982) ---
27
  # Repeating the user prompt improves non-reasoning LLM performance (47 wins, 0 losses
28
  # across 70 benchmark-model combos). Only increases prefill tokens, no extra generation.