Spaces:

build-small-hackathon
/

tiny-press

Running on Zero

sriharsha-cr commited on 4 days ago

Commit

39003c5

1 Parent(s): 8b656bb

task/embedings (#6)

Files changed (3) hide show

config.py CHANGED Viewed

@@ -10,7 +10,7 @@ AVAILABLE_MODELS = [
     "HuggingFaceTB/SmolLM2-135M-Instruct",
     "HuggingFaceTB/SmolLM2-360M-Instruct",
     "Qwen/Qwen2.5-1.5B-Instruct",
-    "meta-llama/Llama-3.2-1B-Instruct",
     "HuggingFaceTB/SmolLM2-1.7B-Instruct",
     "microsoft/Phi-3.5-mini-instruct",
 ]
@@ -30,10 +30,9 @@ MODEL_INFO = {
         "Strong instruction-following for its size; reliably respects token budgets. "
         "Best balance of speed and quality."
     ),
-    "meta-llama/Llama-3.2-1B-Instruct": (
-        "🚀 **Fast · 1B params** — Loads in ~40 s.  \n"
-        "Meta's smallest Llama; good general-purpose compression. "
-        "Requires accepting the Llama licence on HF Hub."
     ),
     "Qwen/Qwen2.5-1.5B-Instruct": (
         "⚖️ **Balanced · 1.5B params** — Loads in ~60 s.  \n"

     "HuggingFaceTB/SmolLM2-135M-Instruct",
     "HuggingFaceTB/SmolLM2-360M-Instruct",
     "Qwen/Qwen2.5-1.5B-Instruct",
+    "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "HuggingFaceTB/SmolLM2-1.7B-Instruct",
     "microsoft/Phi-3.5-mini-instruct",
 ]
         "Strong instruction-following for its size; reliably respects token budgets. "
         "Best balance of speed and quality."
     ),
+    "TinyLlama/TinyLlama-1.1B-Chat-v1.0": (
+        "🚀 **Fast · 1.1B params** — Loads in ~40 s.  \n"
+        "Fully open, no licence required. Good general-purpose compression at 1B scale."
     ),
     "Qwen/Qwen2.5-1.5B-Instruct": (
         "⚖️ **Balanced · 1.5B params** — Loads in ~60 s.  \n"

core/scorer.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import torch
 import numpy as np
 from models.model_loader import get_embedder
@@ -12,8 +11,7 @@ except ImportError:
 @_gpu
 def semantic_score(original: str, compressed: str) -> float:
     embedder = get_embedder()
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    vecs = embedder.encode([original, compressed], device=device, convert_to_numpy=True)
     cos = float(
         np.dot(vecs[0], vecs[1]) / (np.linalg.norm(vecs[0]) * np.linalg.norm(vecs[1]))
     )

 import numpy as np
 from models.model_loader import get_embedder
 @_gpu
 def semantic_score(original: str, compressed: str) -> float:
     embedder = get_embedder()
+    vecs = embedder.encode([original, compressed], convert_to_numpy=True)
     cos = float(
         np.dot(vecs[0], vecs[1]) / (np.linalg.norm(vecs[0]) * np.linalg.norm(vecs[1]))
     )

models/model_loader.py CHANGED Viewed

@@ -95,7 +95,7 @@ def switch_embedder(model_id: str) -> str:
 def _load_embedder(model_id: str):
     global _embedder, _current_embedder_id
-    _embedder = SentenceTransformer(model_id)
     _current_embedder_id = model_id

 def _load_embedder(model_id: str):
     global _embedder, _current_embedder_id
+    _embedder = SentenceTransformer(model_id, device="cpu")
     _current_embedder_id = model_id