Spaces:
Running on Zero
Running on Zero
| import os | |
| # Model settings | |
| LLM_MODEL = os.getenv("LLM_MODEL", "Qwen/Qwen2.5-0.5B-Instruct") | |
| EMBEDDER_MODEL = os.getenv("EMBEDDER_MODEL", "sentence-transformers/all-MiniLM-L6-v2") | |
| # Curated <32B open-weight causal LMs for local inference (shown in the UI dropdown). | |
| AVAILABLE_MODELS = [ | |
| "Qwen/Qwen2.5-0.5B-Instruct", | |
| "HuggingFaceTB/SmolLM2-135M-Instruct", | |
| "HuggingFaceTB/SmolLM2-360M-Instruct", | |
| "Qwen/Qwen2.5-1.5B-Instruct", | |
| "TinyLlama/TinyLlama-1.1B-Chat-v1.0", | |
| "HuggingFaceTB/SmolLM2-1.7B-Instruct", | |
| "microsoft/Phi-3.5-mini-instruct", | |
| ] | |
| MODEL_INFO = { | |
| "HuggingFaceTB/SmolLM2-135M-Instruct": ( | |
| "β‘ **Fastest Β· 135M params** β Loads in ~10 s. \n" | |
| "Good for exploring the interface. May produce looser compressions; " | |
| "switch up for high-fidelity output." | |
| ), | |
| "HuggingFaceTB/SmolLM2-360M-Instruct": ( | |
| "π **Fast Β· 360M params** β Loads in ~20 s. \n" | |
| "Noticeably better compression than 135M with a small speed trade-off." | |
| ), | |
| "Qwen/Qwen2.5-0.5B-Instruct": ( | |
| "β‘ **Fast Β· 500M params Β· Default** β Loads in ~25 s. \n" | |
| "Strong instruction-following for its size; reliably respects token budgets. " | |
| "Best balance of speed and quality." | |
| ), | |
| "TinyLlama/TinyLlama-1.1B-Chat-v1.0": ( | |
| "π **Fast Β· 1.1B params** β Loads in ~40 s. \n" | |
| "Fully open, no licence required. Good general-purpose compression at 1B scale." | |
| ), | |
| "Qwen/Qwen2.5-1.5B-Instruct": ( | |
| "βοΈ **Balanced Β· 1.5B params** β Loads in ~60 s. \n" | |
| "Solid all-rounder; strong semantic fidelity and budget adherence." | |
| ), | |
| "HuggingFaceTB/SmolLM2-1.7B-Instruct": ( | |
| "βοΈ **Balanced Β· 1.7B params** β Loads in ~60 s. \n" | |
| "Designed for edge inference; efficient on CPU." | |
| ), | |
| "microsoft/Phi-3.5-mini-instruct": ( | |
| "π **Best quality Β· 3.8B params** β Loads in ~2 min. \n" | |
| "Strongest reasoning and fidelity in this list. GPU strongly recommended." | |
| ), | |
| } | |
| # Curated sentence-transformer embedding models for quality scoring. | |
| AVAILABLE_EMBEDDER_MODELS = [ | |
| "sentence-transformers/all-MiniLM-L6-v2", | |
| "sentence-transformers/all-mpnet-base-v2", | |
| "BAAI/bge-small-en-v1.5", | |
| "BAAI/bge-base-en-v1.5", | |
| "mixedbread-ai/mxbai-embed-large-v1", | |
| "Alibaba-NLP/gte-Qwen2-1.5B-instruct", | |
| ] | |
| EMBEDDER_INFO = { | |
| "sentence-transformers/all-MiniLM-L6-v2": ( | |
| "β‘ **Fast Β· 22M params Β· Default** \n" | |
| "Great baseline. Scores are reliable for typical compression ratios. " | |
| "Runs comfortably on CPU β minimal overhead." | |
| ), | |
| "sentence-transformers/all-mpnet-base-v2": ( | |
| "βοΈ **Balanced Β· 110M params** \n" | |
| "Noticeably sharper quality scores than MiniLM, especially on longer texts. " | |
| "Small speed trade-off; fine on CPU." | |
| ), | |
| "BAAI/bge-small-en-v1.5": ( | |
| "β‘ **Fast Β· 33M params** \n" | |
| "Strong quality-to-size ratio β often matches MiniLM on accuracy while being " | |
| "slightly more sensitive to meaning shifts. Good CPU option." | |
| ), | |
| "BAAI/bge-base-en-v1.5": ( | |
| "βοΈ **Balanced Β· 109M params** \n" | |
| "Consistently strong on semantic similarity benchmarks. " | |
| "Scores will be more discriminating β small differences in compression quality show up more clearly." | |
| ), | |
| "mixedbread-ai/mxbai-embed-large-v1": ( | |
| "π **High quality Β· 335M params** \n" | |
| "Top-tier similarity scores. Quality readings will be the most accurate here, " | |
| "but slower to load and run. GPU recommended." | |
| ), | |
| "Alibaba-NLP/gte-Qwen2-1.5B-instruct": ( | |
| "π¬ **Best quality Β· 1.5B params** \n" | |
| "Strongest semantic understanding in this list. Scores will reflect subtle meaning loss " | |
| "that smaller models miss. Requires significant RAM/VRAM β GPU strongly recommended." | |
| ), | |
| } | |
| # Compression settings | |
| DEFAULT_TARGET_TOKENS = 500 | |
| MAX_NEW_TOKENS = 1024 | |
| # Gradio | |
| APP_TITLE = "TinyPress" | |
| SERVER_PORT = int(os.getenv("PORT", 7860)) | |