Spaces:
Sleeping
Sleeping
ming commited on
Commit ·
fe47248
1
Parent(s): 1b76b21
feat: Switch V4 to Qwen2.5-1.5B for HF memory compatibility
Browse files- Replace Phi-3-mini (3.8B) with Qwen2.5-1.5B-Instruct
- Phi-3-mini still exceeded 16GB limit even with V2 disabled
- Qwen2.5-1.5B: ~3GB RAM (comfortable within 16GB)
- 3x larger than 0.5B version (much better quality)
- Good structured output and NDJSON generation capability
- Memory: V4: 3GB + system: 2GB = ~5GB (plenty of headroom)
- app/core/config.py +2 -2
app/core/config.py
CHANGED
|
@@ -107,9 +107,9 @@ class Settings(BaseSettings):
|
|
| 107 |
description="Enable V4 model warmup on startup (uses 1-2GB RAM with quantization)",
|
| 108 |
)
|
| 109 |
v4_model_id: str = Field(
|
| 110 |
-
default="
|
| 111 |
env="V4_MODEL_ID",
|
| 112 |
-
description="Model ID for V4 structured output (
|
| 113 |
)
|
| 114 |
v4_max_tokens: int = Field(
|
| 115 |
default=1024, env="V4_MAX_TOKENS", ge=128, le=2048, description="Max tokens for V4 generation"
|
|
|
|
| 107 |
description="Enable V4 model warmup on startup (uses 1-2GB RAM with quantization)",
|
| 108 |
)
|
| 109 |
v4_model_id: str = Field(
|
| 110 |
+
default="Qwen/Qwen2.5-1.5B-Instruct",
|
| 111 |
env="V4_MODEL_ID",
|
| 112 |
+
description="Model ID for V4 structured output (1.5B params, fits HF 16GB limit)",
|
| 113 |
)
|
| 114 |
v4_max_tokens: int = Field(
|
| 115 |
default=1024, env="V4_MAX_TOKENS", ge=128, le=2048, description="Max tokens for V4 generation"
|