Files changed (3) hide show
  1. config.py +5 -4
  2. core/scorer.py +3 -1
  3. models/model_loader.py +1 -1
config.py CHANGED
@@ -10,7 +10,7 @@ AVAILABLE_MODELS = [
10
  "HuggingFaceTB/SmolLM2-135M-Instruct",
11
  "HuggingFaceTB/SmolLM2-360M-Instruct",
12
  "Qwen/Qwen2.5-1.5B-Instruct",
13
- "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
14
  "HuggingFaceTB/SmolLM2-1.7B-Instruct",
15
  "microsoft/Phi-3.5-mini-instruct",
16
  ]
@@ -30,9 +30,10 @@ MODEL_INFO = {
30
  "Strong instruction-following for its size; reliably respects token budgets. "
31
  "Best balance of speed and quality."
32
  ),
33
- "TinyLlama/TinyLlama-1.1B-Chat-v1.0": (
34
- "πŸš€ **Fast Β· 1.1B params** β€” Loads in ~40 s. \n"
35
- "Fully open, no licence required. Good general-purpose compression at 1B scale."
 
36
  ),
37
  "Qwen/Qwen2.5-1.5B-Instruct": (
38
  "βš–οΈ **Balanced Β· 1.5B params** β€” Loads in ~60 s. \n"
 
10
  "HuggingFaceTB/SmolLM2-135M-Instruct",
11
  "HuggingFaceTB/SmolLM2-360M-Instruct",
12
  "Qwen/Qwen2.5-1.5B-Instruct",
13
+ "meta-llama/Llama-3.2-1B-Instruct",
14
  "HuggingFaceTB/SmolLM2-1.7B-Instruct",
15
  "microsoft/Phi-3.5-mini-instruct",
16
  ]
 
30
  "Strong instruction-following for its size; reliably respects token budgets. "
31
  "Best balance of speed and quality."
32
  ),
33
+ "meta-llama/Llama-3.2-1B-Instruct": (
34
+ "πŸš€ **Fast Β· 1B params** β€” Loads in ~40 s. \n"
35
+ "Meta's smallest Llama; good general-purpose compression. "
36
+ "Requires accepting the Llama licence on HF Hub."
37
  ),
38
  "Qwen/Qwen2.5-1.5B-Instruct": (
39
  "βš–οΈ **Balanced Β· 1.5B params** β€” Loads in ~60 s. \n"
core/scorer.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import numpy as np
2
  from models.model_loader import get_embedder
3
 
@@ -11,7 +12,8 @@ except ImportError:
11
  @_gpu
12
  def semantic_score(original: str, compressed: str) -> float:
13
  embedder = get_embedder()
14
- vecs = embedder.encode([original, compressed], convert_to_numpy=True)
 
15
  cos = float(
16
  np.dot(vecs[0], vecs[1]) / (np.linalg.norm(vecs[0]) * np.linalg.norm(vecs[1]))
17
  )
 
1
+ import torch
2
  import numpy as np
3
  from models.model_loader import get_embedder
4
 
 
12
  @_gpu
13
  def semantic_score(original: str, compressed: str) -> float:
14
  embedder = get_embedder()
15
+ device = "cuda" if torch.cuda.is_available() else "cpu"
16
+ vecs = embedder.encode([original, compressed], device=device, convert_to_numpy=True)
17
  cos = float(
18
  np.dot(vecs[0], vecs[1]) / (np.linalg.norm(vecs[0]) * np.linalg.norm(vecs[1]))
19
  )
models/model_loader.py CHANGED
@@ -95,7 +95,7 @@ def switch_embedder(model_id: str) -> str:
95
 
96
  def _load_embedder(model_id: str):
97
  global _embedder, _current_embedder_id
98
- _embedder = SentenceTransformer(model_id, device="cpu")
99
  _current_embedder_id = model_id
100
 
101
 
 
95
 
96
  def _load_embedder(model_id: str):
97
  global _embedder, _current_embedder_id
98
+ _embedder = SentenceTransformer(model_id)
99
  _current_embedder_id = model_id
100
 
101