Spaces:

google
/

embeddinggemma-tuning-lab

Running

App Files Files Community

embeddinggemma-tuning-lab / src /config.py

bebechien

Update README.md, fix model name

7f82e34 verified about 2 months ago

raw

history blame contribute delete

1.75 kB

	import os
	from typing import Final
	from pathlib import Path

	# --- Base Directory Definition ---
	# Use Path for modern, OS-agnostic path handling
	ARTIFACTS_DIR: Final[Path] = Path("artifacts")

	class AppConfig:
	"""
	Central configuration class for the Hacker News Fine-Tuner application.
	"""

	# --- Directory/Environment Configuration ---
	ARTIFACTS_DIR: Final[Path] = ARTIFACTS_DIR

	# Environment variable for Hugging Face token (used by model_trainer)
	HF_TOKEN: Final[str \| None] = os.getenv('HF_TOKEN')


	# --- Caching/Data Fetching Configuration ---
	HN_RSS_URL: Final[str] = "https://news.ycombinator.com/rss"

	# Filename for the pickled cache data (using Path.joinpath)
	CACHE_FILE: Final[Path] = ARTIFACTS_DIR.joinpath("hacker_news_cache.pkl")

	# Cache duration set to 30 minutes (1800 seconds)
	CACHE_DURATION_SECONDS: Final[int] = 60 * 30


	# --- Model/Training Configuration ---

	# Name of the pre-trained embedding model
	MODEL_NAME: Final[str] = 'google/embeddinggemma-300m'

	# Task name for prompting the embedding model (e.g., for instruction tuning)
	TASK_NAME: Final[str] = "Classification"

	# Output directory for the fine-tuned model
	OUTPUT_DIR: Final[Path] = ARTIFACTS_DIR.joinpath("embedding-gemma-finetuned-hn")


	# --- Gradio/App-Specific Configuration ---

	# Anchor text used for contrastive learning dataset generation
	QUERY_ANCHOR: Final[str] = "MY_FAVORITE_NEWS"

	# Default export path for the dataset CSV
	DATASET_EXPORT_FILENAME: Final[Path] = ARTIFACTS_DIR.joinpath("training_dataset.csv")

	# Default model for the standalone Mood Reader tab
	DEFAULT_MOOD_READER_MODEL: Final[str] = "bebechien/embedding-gemma-finetuned-hn"