Spaces:

Rajhuggingface4253
/

test-l

Running

Rajhuggingface4253 commited on Jan 29

Commit

c998b30

verified ·

1 Parent(s): 3f22d9e

Update config.py

Files changed (1) hide show

config.py CHANGED Viewed

@@ -1,8 +1,4 @@
-"""
-Configuration for LFM2.5 FastAPI Backend.
-Optimized for HuggingFace Spaces deployment (2 vCPU, 16GB RAM).
-Uses ONNX Runtime for fast CPU inference.
-"""
 from functools import lru_cache
 from typing import List
@@ -14,12 +10,12 @@ class Settings(BaseSettings):
     """Application settings optimized for HuggingFace Spaces."""
     # Application metadata
-    app_name: str = "LFM2.5 API"
     app_version: str = "1.0.0"
-    # Model settings - Using official ONNX model with Q8 for ~95% accuracy
     model_id: str = "LiquidAI/LFM2.5-1.2B-Instruct-ONNX"
-    model_variant: str = "q8"  # Options: q4 (fastest), q8 (balanced), fp16 (best quality)
     # Server settings (HuggingFace Spaces uses port 7860)
     host: str = "0.0.0.0"
@@ -28,15 +24,15 @@ class Settings(BaseSettings):
     # CORS settings
     cors_origins: List[str] = ["*"]
-    # Generation defaults (from LiquidAI recommendations)
     temperature: float = 0.1
     top_k: int = 50
     top_p: float = 0.1
     max_tokens: int = 2000  # Max output tokens (model supports 32K context)
     repetition_penalty: float = 1.05
-    # CPU optimization - increase threads for better performance
-    num_threads: int = 2  # Set higher if you have more cores (check with: python -c "import os; print(os.cpu_count())")
     # Logging
     log_level: str = "info"

 from functools import lru_cache
 from typing import List
     """Application settings optimized for HuggingFace Spaces."""
     # Application metadata
+    app_name: str = "API"
     app_version: str = "1.0.0"
     model_id: str = "LiquidAI/LFM2.5-1.2B-Instruct-ONNX"
+    model_variant: str = "q8"
     # Server settings (HuggingFace Spaces uses port 7860)
     host: str = "0.0.0.0"
     # CORS settings
     cors_origins: List[str] = ["*"]
     temperature: float = 0.1
     top_k: int = 50
     top_p: float = 0.1
     max_tokens: int = 2000  # Max output tokens (model supports 32K context)
     repetition_penalty: float = 1.05
+    num_threads: int = 2
     # Logging
     log_level: str = "info"