bebechien's picture
Update README.md, fix model name
7f82e34 verified
import os
from typing import Final
from pathlib import Path
# --- Base Directory Definition ---
# Use Path for modern, OS-agnostic path handling
ARTIFACTS_DIR: Final[Path] = Path("artifacts")
class AppConfig:
"""
Central configuration class for the Hacker News Fine-Tuner application.
"""
# --- Directory/Environment Configuration ---
ARTIFACTS_DIR: Final[Path] = ARTIFACTS_DIR
# Environment variable for Hugging Face token (used by model_trainer)
HF_TOKEN: Final[str | None] = os.getenv('HF_TOKEN')
# --- Caching/Data Fetching Configuration ---
HN_RSS_URL: Final[str] = "https://news.ycombinator.com/rss"
# Filename for the pickled cache data (using Path.joinpath)
CACHE_FILE: Final[Path] = ARTIFACTS_DIR.joinpath("hacker_news_cache.pkl")
# Cache duration set to 30 minutes (1800 seconds)
CACHE_DURATION_SECONDS: Final[int] = 60 * 30
# --- Model/Training Configuration ---
# Name of the pre-trained embedding model
MODEL_NAME: Final[str] = 'google/embeddinggemma-300m'
# Task name for prompting the embedding model (e.g., for instruction tuning)
TASK_NAME: Final[str] = "Classification"
# Output directory for the fine-tuned model
OUTPUT_DIR: Final[Path] = ARTIFACTS_DIR.joinpath("embedding-gemma-finetuned-hn")
# --- Gradio/App-Specific Configuration ---
# Anchor text used for contrastive learning dataset generation
QUERY_ANCHOR: Final[str] = "MY_FAVORITE_NEWS"
# Default export path for the dataset CSV
DATASET_EXPORT_FILENAME: Final[Path] = ARTIFACTS_DIR.joinpath("training_dataset.csv")
# Default model for the standalone Mood Reader tab
DEFAULT_MOOD_READER_MODEL: Final[str] = "bebechien/embedding-gemma-finetuned-hn"