Spaces:
Running
Running
File size: 6,140 Bytes
9024ad9 2ed2bd7 10a33a6 9024ad9 2ed2bd7 9024ad9 64e4f7a ae3b1b5 d3f36f7 2ed2bd7 9024ad9 2aa2b79 9024ad9 7ab470d 2ed2bd7 9024ad9 29ed661 2ed2bd7 9024ad9 2aa2b79 2ed2bd7 9024ad9 2aa2b79 2ed2bd7 0b6e76d 6e48ad3 2ed2bd7 0b6e76d 2ed2bd7 0b6e76d 2ed2bd7 1b76b21 2ed2bd7 93c9664 29ed661 93c9664 fe47248 93c9664 fe47248 93c9664 29ed661 93c9664 29ed661 93c9664 7fff563 93c9664 2ed2bd7 2aa2b79 2ed2bd7 2aa2b79 2ed2bd7 2aa2b79 2ed2bd7 7ab470d 9024ad9 29ed661 9024ad9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 | """
Configuration management for the text summarizer backend.
"""
from pydantic import Field, validator
from pydantic_settings import BaseSettings
class Settings(BaseSettings):
"""Application settings loaded from environment variables."""
# Ollama Configuration
ollama_model: str = Field(default="llama3.2:1b", env="OLLAMA_MODEL")
ollama_host: str = Field(default="http://0.0.0.0:11434", env="OLLAMA_HOST")
ollama_timeout: int = Field(default=60, env="OLLAMA_TIMEOUT", ge=1)
# Server Configuration
server_host: str = Field(default="127.0.0.1", env="SERVER_HOST")
server_port: int = Field(default=8000, env="SERVER_PORT", ge=1, le=65535)
log_level: str = Field(default="INFO", env="LOG_LEVEL")
log_format: str = Field(
default="auto",
env="LOG_FORMAT",
description="Log format: 'json' for structured logs, 'text' for colored output, 'auto' for environment-based selection",
)
# Optional: API Security
api_key_enabled: bool = Field(default=False, env="API_KEY_ENABLED")
api_key: str | None = Field(default=None, env="API_KEY")
# Optional: Rate Limiting
rate_limit_enabled: bool = Field(default=False, env="RATE_LIMIT_ENABLED")
rate_limit_requests: int = Field(default=60, env="RATE_LIMIT_REQUESTS", ge=1)
rate_limit_window: int = Field(default=60, env="RATE_LIMIT_WINDOW", ge=1)
# Input validation
max_text_length: int = Field(default=32000, env="MAX_TEXT_LENGTH", ge=1) # ~32KB
max_tokens_default: int = Field(default=256, env="MAX_TOKENS_DEFAULT", ge=1)
# V2 HuggingFace Configuration
hf_model_id: str = Field(default="sshleifer/distilbart-cnn-6-6", env="HF_MODEL_ID")
hf_device_map: str = Field(
default="auto", env="HF_DEVICE_MAP"
) # "auto" for GPU fallback to CPU
hf_torch_dtype: str = Field(
default="auto", env="HF_TORCH_DTYPE"
) # "auto" for automatic dtype selection
hf_cache_dir: str = Field(
default="/tmp/huggingface", env="HF_HOME"
) # HuggingFace cache directory
hf_max_new_tokens: int = Field(default=128, env="HF_MAX_NEW_TOKENS", ge=1, le=2048)
hf_temperature: float = Field(default=0.7, env="HF_TEMPERATURE", ge=0.0, le=2.0)
hf_top_p: float = Field(default=0.95, env="HF_TOP_P", ge=0.0, le=1.0)
# V1/V2 Warmup Control
enable_v1_warmup: bool = Field(
default=False, env="ENABLE_V1_WARMUP"
) # Disable V1 warmup by default
enable_v2_warmup: bool = Field(
default=False, env="ENABLE_V2_WARMUP"
) # Disable V2 warmup to save memory for V4
# V3 Web Scraping Configuration
enable_v3_scraping: bool = Field(
default=True, env="ENABLE_V3_SCRAPING", description="Enable V3 web scraping API"
)
scraping_timeout: int = Field(
default=10,
env="SCRAPING_TIMEOUT",
ge=1,
le=60,
description="HTTP timeout for scraping requests (seconds)",
)
scraping_max_text_length: int = Field(
default=50000,
env="SCRAPING_MAX_TEXT_LENGTH",
description="Maximum text length to extract (chars)",
)
scraping_cache_enabled: bool = Field(
default=True,
env="SCRAPING_CACHE_ENABLED",
description="Enable in-memory caching of scraped content",
)
scraping_cache_ttl: int = Field(
default=3600,
env="SCRAPING_CACHE_TTL",
description="Cache TTL in seconds (default: 1 hour)",
)
scraping_user_agent_rotation: bool = Field(
default=True,
env="SCRAPING_UA_ROTATION",
description="Enable user-agent rotation",
)
scraping_rate_limit_per_minute: int = Field(
default=10,
env="SCRAPING_RATE_LIMIT_PER_MINUTE",
ge=1,
le=100,
description="Max scraping requests per minute per IP",
)
# V4 Structured Output Configuration
enable_v4_structured: bool = Field(
default=True,
env="ENABLE_V4_STRUCTURED",
description="Enable V4 structured summarization API",
)
enable_v4_warmup: bool = Field(
default=False,
env="ENABLE_V4_WARMUP",
description="Enable V4 model warmup on startup (uses 1-2GB RAM with quantization)",
)
v4_model_id: str = Field(
default="Qwen/Qwen2.5-1.5B-Instruct",
env="V4_MODEL_ID",
description="Model ID for V4 structured output (1.5B params, fits HF 16GB limit)",
)
v4_max_tokens: int = Field(
default=256,
env="V4_MAX_TOKENS",
ge=128,
le=2048,
description="Max tokens for V4 generation",
)
v4_temperature: float = Field(
default=0.2,
env="V4_TEMPERATURE",
ge=0.0,
le=2.0,
description="Temperature for V4 (low for stable JSON)",
)
v4_enable_quantization: bool = Field(
default=True,
env="V4_ENABLE_QUANTIZATION",
description="Enable INT8 quantization for V4 model (reduces memory from ~2GB to ~1GB). Quantization takes ~1-2 minutes on startup.",
)
v4_use_fp16_for_speed: bool = Field(
default=False,
env="V4_USE_FP16_FOR_SPEED",
description="Use FP16 instead of 4-bit quantization for 2-3x faster inference (uses ~2-3GB GPU memory instead of ~1GB)",
)
@validator("log_level")
def validate_log_level(cls, v):
"""Validate log level is one of the standard levels."""
valid_levels = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
if v.upper() not in valid_levels:
return "INFO" # Default to INFO for invalid levels
return v.upper()
@validator("log_format")
def validate_log_format(cls, v):
"""Validate log format is one of the supported formats."""
valid_formats = ["auto", "json", "text"]
if v.lower() not in valid_formats:
return "auto" # Default to auto for invalid formats
return v.lower()
class Config:
env_file = ".env"
case_sensitive = False
extra = "ignore" # Ignore extra fields from environment (e.g., old v4_phi_* variables)
# Global settings instance
settings = Settings()
|