Spaces:

klzn
/

sentimentstream-worker

Running

sentimentstream-worker / backend /app /core /config.py

GitHub Action

deploy: worker release from GitHub

8ff1b66 27 days ago

6.24 kB

	"""
	Konfiguracja aplikacji.

	Wykorzystuje Pydantic Settings do zarządzania zmiennymi środowiskowymi.
	"""

	from functools import lru_cache

	from pydantic_settings import BaseSettings, SettingsConfigDict


	class Settings(BaseSettings):
	"""
	Ustawienia aplikacji ładowane ze zmiennych środowiskowych.
	"""

	model_config = SettingsConfigDict(
	env_file=(".env", "backend/.env"),
	env_file_encoding="utf-8",
	case_sensitive=False,
	extra="ignore"
	)

	# MongoDB
	mongodb_url: str = ""
	mongodb_db_name: str = "sentimentSummary"

	# App Mode
	app_mode: str = "full" # "full" = monolith, "api" = API-only (no frontend)

	# App Settings
	debug: bool = False
	cors_origins: str = "http://localhost:5173,http://localhost:3000"

	# Cache Settings
	cache_ttl_hours: int = 24
	cache_ttl_short_hours: int = 12 # frequently updated games
	cache_ttl_long_hours: int = 168 # stable games (7 days)
	cache_ttl_worker_managed_hours: int = 1440 # 60 days to preserve stale fallback results
	cache_ttl_on_demand_hours: int = 1440 # 60 days to preserve stale fallback results

	# Incremental Analysis
	incremental_enabled: bool = True
	incremental_max_stored_ids: int = 5000
	incremental_max_gap_days: int = 90 # fall back to full analysis after this many days without reviews
	recent_sample_limit: int = 1000
	niche_cache_max_age_days: int = 60
	analysis_freshness_max_age_days: int = 60
	patch_context_max_age_days: int = 90
	dlc_min_reviews_for_analysis: int = 50
	dlc_visible_in_search: bool = False # Temporary policy: hide DLC from autocomplete/suggestions
	dlc_worker_analysis_enabled: bool = False # Temporary policy: exclude DLC from worker-managed analysis

	# Steam API Settings
	review_batch_size: int = 100
	steam_review_language: str = "schinese" # Review fetch scope; product analyzes Simplified Chinese Steam reviews.
	steam_region: str = "CN" # CN, US, etc.

	# Steam API Retry
	steam_retry_max_attempts: int = 3
	steam_retry_base_delay: float = 1.0 # doubles each retry
	steam_retry_max_delay: float = 10.0 # cap

	# Steam API Error Cache TTL (seconds)
	steam_error_cache_ttl_404: int = 3600 # 1h
	steam_error_cache_ttl_429: int = 300 # 5min

	# Sampling Settings - Statistical sampling parameters
	sample_top_helpful: int = 50
	sample_confidence_level: float = 0.95
	sample_margin_of_error: float = 0.02
	sample_max_reviews: int = 3000
	sample_minority_min: int = 100

	# NLP Settings - Analysis Parameters
	text_max_length: int = 512
	sentiment_positive_threshold: float = 0.1
	sentiment_negative_threshold: float = -0.1
	topic_min_mentions: int = 5

	# NLP Settings - Deduplication Cache
	dedup_cache_maxsize: int = 10000

	# NLP Settings - Performance & Logic
	nlp_onnx_intra_threads: int = 2
	nlp_onnx_inter_threads: int = 2
	nlp_negation_window: int = 3

	# Prediction Settings
	prediction_retention_threshold_pos: float = 0.2
	prediction_retention_threshold_neg: float = -0.2

	# Community Highlights
	highlights_ngram_min: int = 2
	highlights_ngram_max: int = 5
	highlights_min_mentions: int = 3
	highlights_max_doc_freq_ratio: float = 0.4
	highlights_top_n_general: int = 15
	highlights_top_n_per_topic: int = 5

	# Worker — Pre-cache
	worker_trigger_token: str = ""
	precache_enabled: bool = False
	precache_top_n_games: int = 500
	precache_batch_delay_seconds: int = 10
	precache_checkpoints_hours: str = "6,12,24,72,168,336"
	precache_max_analyses_per_cycle: int = 50

	# Worker — Priority Games
	steam_priority_categories: str = "top_sellers,new_releases,specials"
	steam_priority_regions: str = "CN,US"
	steam_priority_grace_days: int = 3
	steam_priority_categories_url: str = "https://store.steampowered.com/api/featuredcategories"
	steam_bootstrap_max_per_cycle: int = 20
	steam_bootstrap_delay: float = 1.5

	# Worker — News Scan
	news_refresh_window_hours: int = 6
	news_initial_count: int = 20
	news_incremental_count: int = 5

	# Worker — Game Sync
	game_sync_enabled: bool = False
	game_sync_steamspy_delay: float = 61.0
	game_sync_details_delay: float = 1.1
	game_sync_top_n_details: int = 500
	game_sync_cn_enrichment_delay: float = 1.5
	game_sync_cn_enrichment_limit: int = 200
	game_sync_app_type_enrichment_delay: float = 1.5
	game_sync_app_type_enrichment_limit: int = 200

	# Logging (both Live API and Worker)
	worker_log_dir: str = "/data/worker_logs"
	worker_log_fallback_dir: str = "/tmp/worker_logs"
	worker_log_max_bytes: int = 5_000_000 # 5 MB per file
	worker_log_backup_count: int = 3 # 3 rotated files = 20 MB max
	nlp_verbose_logging: bool = False # re-enable NLP debug logs to stdout
	nlp_debug_log_max_bytes: int = 2_000_000 # 2 MB per file
	errors_log_max_bytes: int = 2_000_000 # 2 MB per file

	# Rate Limiting
	rate_limit_analyze: str = "10/minute"
	rate_limit_default: str = "30/minute"

	# NLP Settings - Hugging Face Models
	# Using specialized Chinese model (RoBERTa-JD) - 90% accuracy on product reviews
	hf_sentiment_model: str = "uer/roberta-base-finetuned-jd-binary-chinese"

	@property
	def cors_origins_list(self) -> list[str]:
	"""Zwraca listę dozwolonych originów CORS."""
	return [origin.strip() for origin in self.cors_origins.split(",")]

	@property
	def precache_checkpoints_list(self) -> list[int]:
	"""Parse checkpoint hours from comma-separated string."""
	return sorted(int(h.strip()) for h in self.precache_checkpoints_hours.split(","))

	@property
	def steam_priority_categories_list(self) -> list[str]:
	return [c.strip() for c in self.steam_priority_categories.split(",") if c.strip()]

	@property
	def steam_priority_regions_list(self) -> list[str]:
	return [r.strip() for r in self.steam_priority_regions.split(",") if r.strip()]


	@lru_cache
	def get_settings() -> Settings:
	"""Zwraca singleton instancji Settings."""
	return Settings()


	settings = get_settings()