Spaces:
Running
Running
Update config.py
Browse files
config.py
CHANGED
|
@@ -1,8 +1,4 @@
|
|
| 1 |
-
|
| 2 |
-
Configuration for LFM2.5 FastAPI Backend.
|
| 3 |
-
Optimized for HuggingFace Spaces deployment (2 vCPU, 16GB RAM).
|
| 4 |
-
Uses ONNX Runtime for fast CPU inference.
|
| 5 |
-
"""
|
| 6 |
|
| 7 |
from functools import lru_cache
|
| 8 |
from typing import List
|
|
@@ -14,12 +10,12 @@ class Settings(BaseSettings):
|
|
| 14 |
"""Application settings optimized for HuggingFace Spaces."""
|
| 15 |
|
| 16 |
# Application metadata
|
| 17 |
-
app_name: str = "
|
| 18 |
app_version: str = "1.0.0"
|
| 19 |
|
| 20 |
-
|
| 21 |
model_id: str = "LiquidAI/LFM2.5-1.2B-Instruct-ONNX"
|
| 22 |
-
model_variant: str = "q8"
|
| 23 |
|
| 24 |
# Server settings (HuggingFace Spaces uses port 7860)
|
| 25 |
host: str = "0.0.0.0"
|
|
@@ -28,15 +24,15 @@ class Settings(BaseSettings):
|
|
| 28 |
# CORS settings
|
| 29 |
cors_origins: List[str] = ["*"]
|
| 30 |
|
| 31 |
-
|
| 32 |
temperature: float = 0.1
|
| 33 |
top_k: int = 50
|
| 34 |
top_p: float = 0.1
|
| 35 |
max_tokens: int = 2000 # Max output tokens (model supports 32K context)
|
| 36 |
repetition_penalty: float = 1.05
|
| 37 |
|
| 38 |
-
|
| 39 |
-
num_threads: int = 2
|
| 40 |
|
| 41 |
# Logging
|
| 42 |
log_level: str = "info"
|
|
|
|
| 1 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
from functools import lru_cache
|
| 4 |
from typing import List
|
|
|
|
| 10 |
"""Application settings optimized for HuggingFace Spaces."""
|
| 11 |
|
| 12 |
# Application metadata
|
| 13 |
+
app_name: str = "API"
|
| 14 |
app_version: str = "1.0.0"
|
| 15 |
|
| 16 |
+
|
| 17 |
model_id: str = "LiquidAI/LFM2.5-1.2B-Instruct-ONNX"
|
| 18 |
+
model_variant: str = "q8"
|
| 19 |
|
| 20 |
# Server settings (HuggingFace Spaces uses port 7860)
|
| 21 |
host: str = "0.0.0.0"
|
|
|
|
| 24 |
# CORS settings
|
| 25 |
cors_origins: List[str] = ["*"]
|
| 26 |
|
| 27 |
+
|
| 28 |
temperature: float = 0.1
|
| 29 |
top_k: int = 50
|
| 30 |
top_p: float = 0.1
|
| 31 |
max_tokens: int = 2000 # Max output tokens (model supports 32K context)
|
| 32 |
repetition_penalty: float = 1.05
|
| 33 |
|
| 34 |
+
|
| 35 |
+
num_threads: int = 2
|
| 36 |
|
| 37 |
# Logging
|
| 38 |
log_level: str = "info"
|