sriharsha-cr commited on
Commit
39003c5
·
1 Parent(s): 8b656bb

task/embedings (#6)

Browse files

- torch removed (5f8da10afbd1b71af20c56026b27dc2252cd6cba)
- Removed Gated models (790b8136f125a55cc500e682baa4c4a0e8d75ed2)

Files changed (3) hide show
  1. config.py +4 -5
  2. core/scorer.py +1 -3
  3. models/model_loader.py +1 -1
config.py CHANGED
@@ -10,7 +10,7 @@ AVAILABLE_MODELS = [
10
  "HuggingFaceTB/SmolLM2-135M-Instruct",
11
  "HuggingFaceTB/SmolLM2-360M-Instruct",
12
  "Qwen/Qwen2.5-1.5B-Instruct",
13
- "meta-llama/Llama-3.2-1B-Instruct",
14
  "HuggingFaceTB/SmolLM2-1.7B-Instruct",
15
  "microsoft/Phi-3.5-mini-instruct",
16
  ]
@@ -30,10 +30,9 @@ MODEL_INFO = {
30
  "Strong instruction-following for its size; reliably respects token budgets. "
31
  "Best balance of speed and quality."
32
  ),
33
- "meta-llama/Llama-3.2-1B-Instruct": (
34
- "🚀 **Fast · 1B params** — Loads in ~40 s. \n"
35
- "Meta's smallest Llama; good general-purpose compression. "
36
- "Requires accepting the Llama licence on HF Hub."
37
  ),
38
  "Qwen/Qwen2.5-1.5B-Instruct": (
39
  "⚖️ **Balanced · 1.5B params** — Loads in ~60 s. \n"
 
10
  "HuggingFaceTB/SmolLM2-135M-Instruct",
11
  "HuggingFaceTB/SmolLM2-360M-Instruct",
12
  "Qwen/Qwen2.5-1.5B-Instruct",
13
+ "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
14
  "HuggingFaceTB/SmolLM2-1.7B-Instruct",
15
  "microsoft/Phi-3.5-mini-instruct",
16
  ]
 
30
  "Strong instruction-following for its size; reliably respects token budgets. "
31
  "Best balance of speed and quality."
32
  ),
33
+ "TinyLlama/TinyLlama-1.1B-Chat-v1.0": (
34
+ "🚀 **Fast · 1.1B params** — Loads in ~40 s. \n"
35
+ "Fully open, no licence required. Good general-purpose compression at 1B scale."
 
36
  ),
37
  "Qwen/Qwen2.5-1.5B-Instruct": (
38
  "⚖️ **Balanced · 1.5B params** — Loads in ~60 s. \n"
core/scorer.py CHANGED
@@ -1,4 +1,3 @@
1
- import torch
2
  import numpy as np
3
  from models.model_loader import get_embedder
4
 
@@ -12,8 +11,7 @@ except ImportError:
12
  @_gpu
13
  def semantic_score(original: str, compressed: str) -> float:
14
  embedder = get_embedder()
15
- device = "cuda" if torch.cuda.is_available() else "cpu"
16
- vecs = embedder.encode([original, compressed], device=device, convert_to_numpy=True)
17
  cos = float(
18
  np.dot(vecs[0], vecs[1]) / (np.linalg.norm(vecs[0]) * np.linalg.norm(vecs[1]))
19
  )
 
 
1
  import numpy as np
2
  from models.model_loader import get_embedder
3
 
 
11
  @_gpu
12
  def semantic_score(original: str, compressed: str) -> float:
13
  embedder = get_embedder()
14
+ vecs = embedder.encode([original, compressed], convert_to_numpy=True)
 
15
  cos = float(
16
  np.dot(vecs[0], vecs[1]) / (np.linalg.norm(vecs[0]) * np.linalg.norm(vecs[1]))
17
  )
models/model_loader.py CHANGED
@@ -95,7 +95,7 @@ def switch_embedder(model_id: str) -> str:
95
 
96
  def _load_embedder(model_id: str):
97
  global _embedder, _current_embedder_id
98
- _embedder = SentenceTransformer(model_id)
99
  _current_embedder_id = model_id
100
 
101
 
 
95
 
96
  def _load_embedder(model_id: str):
97
  global _embedder, _current_embedder_id
98
+ _embedder = SentenceTransformer(model_id, device="cpu")
99
  _current_embedder_id = model_id
100
 
101