| import os | |
| from typing import Final | |
| from pathlib import Path | |
| # --- Base Directory Definition --- | |
| # Use Path for modern, OS-agnostic path handling | |
| ARTIFACTS_DIR: Final[Path] = Path("artifacts") | |
| class AppConfig: | |
| """ | |
| Central configuration class for the Hacker News Fine-Tuner application. | |
| """ | |
| # --- Directory/Environment Configuration --- | |
| ARTIFACTS_DIR: Final[Path] = ARTIFACTS_DIR | |
| # Environment variable for Hugging Face token (used by model_trainer) | |
| HF_TOKEN: Final[str | None] = os.getenv('HF_TOKEN') | |
| # --- Caching/Data Fetching Configuration --- | |
| HN_RSS_URL: Final[str] = "https://news.ycombinator.com/rss" | |
| # Filename for the pickled cache data (using Path.joinpath) | |
| CACHE_FILE: Final[Path] = ARTIFACTS_DIR.joinpath("hacker_news_cache.pkl") | |
| # Cache duration set to 30 minutes (1800 seconds) | |
| CACHE_DURATION_SECONDS: Final[int] = 60 * 30 | |
| # --- Model/Training Configuration --- | |
| # Name of the pre-trained embedding model | |
| MODEL_NAME: Final[str] = 'google/embeddinggemma-300m' | |
| # Task name for prompting the embedding model (e.g., for instruction tuning) | |
| TASK_NAME: Final[str] = "Classification" | |
| # Output directory for the fine-tuned model | |
| OUTPUT_DIR: Final[Path] = ARTIFACTS_DIR.joinpath("embedding-gemma-finetuned-hn") | |
| # --- Gradio/App-Specific Configuration --- | |
| # Anchor text used for contrastive learning dataset generation | |
| QUERY_ANCHOR: Final[str] = "MY_FAVORITE_NEWS" | |
| # Default export path for the dataset CSV | |
| DATASET_EXPORT_FILENAME: Final[Path] = ARTIFACTS_DIR.joinpath("training_dataset.csv") | |
| # Default model for the standalone Mood Reader tab | |
| DEFAULT_MOOD_READER_MODEL: Final[str] = "bebechien/embedding-gemma-finetuned-hn" | |