Rajhuggingface4253 commited on
Commit
c998b30
·
verified ·
1 Parent(s): 3f22d9e

Update config.py

Browse files
Files changed (1) hide show
  1. config.py +7 -11
config.py CHANGED
@@ -1,8 +1,4 @@
1
- """
2
- Configuration for LFM2.5 FastAPI Backend.
3
- Optimized for HuggingFace Spaces deployment (2 vCPU, 16GB RAM).
4
- Uses ONNX Runtime for fast CPU inference.
5
- """
6
 
7
  from functools import lru_cache
8
  from typing import List
@@ -14,12 +10,12 @@ class Settings(BaseSettings):
14
  """Application settings optimized for HuggingFace Spaces."""
15
 
16
  # Application metadata
17
- app_name: str = "LFM2.5 API"
18
  app_version: str = "1.0.0"
19
 
20
- # Model settings - Using official ONNX model with Q8 for ~95% accuracy
21
  model_id: str = "LiquidAI/LFM2.5-1.2B-Instruct-ONNX"
22
- model_variant: str = "q8" # Options: q4 (fastest), q8 (balanced), fp16 (best quality)
23
 
24
  # Server settings (HuggingFace Spaces uses port 7860)
25
  host: str = "0.0.0.0"
@@ -28,15 +24,15 @@ class Settings(BaseSettings):
28
  # CORS settings
29
  cors_origins: List[str] = ["*"]
30
 
31
- # Generation defaults (from LiquidAI recommendations)
32
  temperature: float = 0.1
33
  top_k: int = 50
34
  top_p: float = 0.1
35
  max_tokens: int = 2000 # Max output tokens (model supports 32K context)
36
  repetition_penalty: float = 1.05
37
 
38
- # CPU optimization - increase threads for better performance
39
- num_threads: int = 2 # Set higher if you have more cores (check with: python -c "import os; print(os.cpu_count())")
40
 
41
  # Logging
42
  log_level: str = "info"
 
1
+
 
 
 
 
2
 
3
  from functools import lru_cache
4
  from typing import List
 
10
  """Application settings optimized for HuggingFace Spaces."""
11
 
12
  # Application metadata
13
+ app_name: str = "API"
14
  app_version: str = "1.0.0"
15
 
16
+
17
  model_id: str = "LiquidAI/LFM2.5-1.2B-Instruct-ONNX"
18
+ model_variant: str = "q8"
19
 
20
  # Server settings (HuggingFace Spaces uses port 7860)
21
  host: str = "0.0.0.0"
 
24
  # CORS settings
25
  cors_origins: List[str] = ["*"]
26
 
27
+
28
  temperature: float = 0.1
29
  top_k: int = 50
30
  top_p: float = 0.1
31
  max_tokens: int = 2000 # Max output tokens (model supports 32K context)
32
  repetition_penalty: float = 1.05
33
 
34
+
35
+ num_threads: int = 2
36
 
37
  # Logging
38
  log_level: str = "info"