Spaces:

build-small-hackathon
/

tiny-press

Running on Zero

App Files Files Community

tiny-press / config.py

sriharsha-cr

task/embedings (#6)

39003c5 about 11 hours ago

raw

history blame contribute delete

4.08 kB

	import os

	# Model settings
	LLM_MODEL = os.getenv("LLM_MODEL", "Qwen/Qwen2.5-0.5B-Instruct")
	EMBEDDER_MODEL = os.getenv("EMBEDDER_MODEL", "sentence-transformers/all-MiniLM-L6-v2")

	# Curated <32B open-weight causal LMs for local inference (shown in the UI dropdown).
	AVAILABLE_MODELS = [
	"Qwen/Qwen2.5-0.5B-Instruct",
	"HuggingFaceTB/SmolLM2-135M-Instruct",
	"HuggingFaceTB/SmolLM2-360M-Instruct",
	"Qwen/Qwen2.5-1.5B-Instruct",
	"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
	"HuggingFaceTB/SmolLM2-1.7B-Instruct",
	"microsoft/Phi-3.5-mini-instruct",
	]

	MODEL_INFO = {
	"HuggingFaceTB/SmolLM2-135M-Instruct": (
	"⚡ Fastest · 135M params — Loads in ~10 s. \n"
	"Good for exploring the interface. May produce looser compressions; "
	"switch up for high-fidelity output."
	),
	"HuggingFaceTB/SmolLM2-360M-Instruct": (
	"🚀 Fast · 360M params — Loads in ~20 s. \n"
	"Noticeably better compression than 135M with a small speed trade-off."
	),
	"Qwen/Qwen2.5-0.5B-Instruct": (
	"⚡ Fast · 500M params · Default — Loads in ~25 s. \n"
	"Strong instruction-following for its size; reliably respects token budgets. "
	"Best balance of speed and quality."
	),
	"TinyLlama/TinyLlama-1.1B-Chat-v1.0": (
	"🚀 Fast · 1.1B params — Loads in ~40 s. \n"
	"Fully open, no licence required. Good general-purpose compression at 1B scale."
	),
	"Qwen/Qwen2.5-1.5B-Instruct": (
	"⚖️ Balanced · 1.5B params — Loads in ~60 s. \n"
	"Solid all-rounder; strong semantic fidelity and budget adherence."
	),
	"HuggingFaceTB/SmolLM2-1.7B-Instruct": (
	"⚖️ Balanced · 1.7B params — Loads in ~60 s. \n"
	"Designed for edge inference; efficient on CPU."
	),
	"microsoft/Phi-3.5-mini-instruct": (
	"🏆 Best quality · 3.8B params — Loads in ~2 min. \n"
	"Strongest reasoning and fidelity in this list. GPU strongly recommended."
	),
	}

	# Curated sentence-transformer embedding models for quality scoring.
	AVAILABLE_EMBEDDER_MODELS = [
	"sentence-transformers/all-MiniLM-L6-v2",
	"sentence-transformers/all-mpnet-base-v2",
	"BAAI/bge-small-en-v1.5",
	"BAAI/bge-base-en-v1.5",
	"mixedbread-ai/mxbai-embed-large-v1",
	"Alibaba-NLP/gte-Qwen2-1.5B-instruct",
	]

	EMBEDDER_INFO = {
	"sentence-transformers/all-MiniLM-L6-v2": (
	"⚡ Fast · 22M params · Default \n"
	"Great baseline. Scores are reliable for typical compression ratios. "
	"Runs comfortably on CPU — minimal overhead."
	),
	"sentence-transformers/all-mpnet-base-v2": (
	"⚖️ Balanced · 110M params \n"
	"Noticeably sharper quality scores than MiniLM, especially on longer texts. "
	"Small speed trade-off; fine on CPU."
	),
	"BAAI/bge-small-en-v1.5": (
	"⚡ Fast · 33M params \n"
	"Strong quality-to-size ratio — often matches MiniLM on accuracy while being "
	"slightly more sensitive to meaning shifts. Good CPU option."
	),
	"BAAI/bge-base-en-v1.5": (
	"⚖️ Balanced · 109M params \n"
	"Consistently strong on semantic similarity benchmarks. "
	"Scores will be more discriminating — small differences in compression quality show up more clearly."
	),
	"mixedbread-ai/mxbai-embed-large-v1": (
	"🏆 High quality · 335M params \n"
	"Top-tier similarity scores. Quality readings will be the most accurate here, "
	"but slower to load and run. GPU recommended."
	),
	"Alibaba-NLP/gte-Qwen2-1.5B-instruct": (
	"🔬 Best quality · 1.5B params \n"
	"Strongest semantic understanding in this list. Scores will reflect subtle meaning loss "
	"that smaller models miss. Requires significant RAM/VRAM — GPU strongly recommended."
	),
	}

	# Compression settings
	DEFAULT_TARGET_TOKENS = 500
	MAX_NEW_TOKENS = 1024

	# Gradio
	APP_TITLE = "TinyPress"
	SERVER_PORT = int(os.getenv("PORT", 7860))