Files changed (5) hide show
  1. README.md +3 -2
  2. app.py +6 -0
  3. core/tokenizer_utils.py +3 -3
  4. models/model_loader.py +12 -1
  5. requirements.txt +0 -2
README.md CHANGED
@@ -35,6 +35,7 @@ No cloud. No API bill. Two small models running quietly on your machine.
35
 
36
  [![TinyPress Demo](https://img.youtube.com/vi/hDbIDtjjiB0/0.jpg)](https://youtu.be/hDbIDtjjiB0)
37
 
 
38
  ---
39
 
40
  ## Why this fits Thousand Token Wood
@@ -134,7 +135,7 @@ Full docs: [Architecture](docs/architecture.md) · [Setup](docs/setup.md) · [Ge
134
 
135
  Built by **[Sriharsha C R](https://www.linkedin.com/in/sriharsha-cr)** — AI Engineer and Cloud Native developer.
136
 
137
- [![LinkedIn](https://img.shields.io/badge/LinkedIn-sriharsha--cr-0a66c2?logo=linkedin&logoColor=white)](https://www.linkedin.com/in/sriharsha-cr)
138
- [![X / Twitter](https://img.shields.io/badge/X-@sriharsha__cr-000000?logo=x&logoColor=white)](https://x.com/sriharsha_cr)
139
  [![HuggingFace](https://img.shields.io/badge/HuggingFace-sriharsha--cr-ff9d00?logo=huggingface&logoColor=white)](https://huggingface.co/sriharsha-cr)
140
  [![GitHub](https://img.shields.io/badge/GitHub-SriharshaCR-181717?logo=github&logoColor=white)](https://github.com/SriharshaCR)
 
35
 
36
  [![TinyPress Demo](https://img.youtube.com/vi/hDbIDtjjiB0/0.jpg)](https://youtu.be/hDbIDtjjiB0)
37
 
38
+
39
  ---
40
 
41
  ## Why this fits Thousand Token Wood
 
135
 
136
  Built by **[Sriharsha C R](https://www.linkedin.com/in/sriharsha-cr)** — AI Engineer and Cloud Native developer.
137
 
138
+ [![LinkedIn](https://img.shields.io/badge/LinkedIn-sriharsha--cr-0a66c2?logo=linkedin&logoColor=white)](https://www.linkedin.com/posts/sriharsha-cr_tinypress-prompt-compression-engine-activity-7471426128331624448-aKfe)
139
+ [![X / Twitter](https://img.shields.io/badge/X-@sriharsha__cr-000000?logo=x&logoColor=white)](https://x.com/sriharsha_cr/status/2065662576684650879)
140
  [![HuggingFace](https://img.shields.io/badge/HuggingFace-sriharsha--cr-ff9d00?logo=huggingface&logoColor=white)](https://huggingface.co/sriharsha-cr)
141
  [![GitHub](https://img.shields.io/badge/GitHub-SriharshaCR-181717?logo=github&logoColor=white)](https://github.com/SriharshaCR)
app.py CHANGED
@@ -2,6 +2,12 @@ import gradio as gr
2
  import config
3
  from ui.compress_tab import build_compress_tab
4
  from ui.history_tab import build_history_tab
 
 
 
 
 
 
5
 
6
 
7
  def build_app() -> gr.Blocks:
 
2
  import config
3
  from ui.compress_tab import build_compress_tab
4
  from ui.history_tab import build_history_tab
5
+ from models.model_loader import get_tokenizer_only
6
+
7
+ try:
8
+ get_tokenizer_only() # pre-warm; first keystroke is instant
9
+ except Exception:
10
+ pass # falls back to lazy load on first keystroke
11
 
12
 
13
  def build_app() -> gr.Blocks:
core/tokenizer_utils.py CHANGED
@@ -1,13 +1,13 @@
1
- from models.model_loader import get_llm
2
 
3
 
4
  def count_tokens(text: str) -> int:
5
- _, tokenizer = get_llm()
6
  return len(tokenizer.encode(text, add_special_tokens=False))
7
 
8
 
9
  def get_token_strings(text: str) -> list[str]:
10
  """Return the decoded surface string for every token in text."""
11
- _, tokenizer = get_llm()
12
  ids = tokenizer.encode(text, add_special_tokens=False)
13
  return [tokenizer.decode([i]) for i in ids]
 
1
+ from models.model_loader import get_tokenizer_only
2
 
3
 
4
  def count_tokens(text: str) -> int:
5
+ tokenizer = get_tokenizer_only()
6
  return len(tokenizer.encode(text, add_special_tokens=False))
7
 
8
 
9
  def get_token_strings(text: str) -> list[str]:
10
  """Return the decoded surface string for every token in text."""
11
+ tokenizer = get_tokenizer_only()
12
  ids = tokenizer.encode(text, add_special_tokens=False)
13
  return [tokenizer.decode([i]) for i in ids]
models/model_loader.py CHANGED
@@ -6,6 +6,7 @@ import config
6
 
7
  _llm = None
8
  _tokenizer = None
 
9
  _embedder = None
10
  _current_model_id = None
11
  _current_embedder_id = None
@@ -24,6 +25,15 @@ def get_current_embedder_id() -> str | None:
24
  return _current_embedder_id
25
 
26
 
 
 
 
 
 
 
 
 
 
27
  def get_llm():
28
  global _llm, _tokenizer
29
  if _llm is None:
@@ -55,12 +65,13 @@ def _load_llm(model_id: str):
55
 
56
  def _unload_llm():
57
  """Free GPU/CPU memory before loading a different model."""
58
- global _llm, _tokenizer, _current_model_id
59
  del _llm
60
  del _tokenizer
61
  _llm = None
62
  _tokenizer = None
63
  _current_model_id = None
 
64
  gc.collect()
65
  if torch.cuda.is_available():
66
  torch.cuda.empty_cache()
 
6
 
7
  _llm = None
8
  _tokenizer = None
9
+ _tokenizer_only = None
10
  _embedder = None
11
  _current_model_id = None
12
  _current_embedder_id = None
 
25
  return _current_embedder_id
26
 
27
 
28
+ def get_tokenizer_only():
29
+ global _tokenizer_only
30
+ if _tokenizer is not None:
31
+ return _tokenizer
32
+ if _tokenizer_only is None:
33
+ _tokenizer_only = AutoTokenizer.from_pretrained(config.LLM_MODEL)
34
+ return _tokenizer_only
35
+
36
+
37
  def get_llm():
38
  global _llm, _tokenizer
39
  if _llm is None:
 
65
 
66
  def _unload_llm():
67
  """Free GPU/CPU memory before loading a different model."""
68
+ global _llm, _tokenizer, _current_model_id, _tokenizer_only
69
  del _llm
70
  del _tokenizer
71
  _llm = None
72
  _tokenizer = None
73
  _current_model_id = None
74
+ _tokenizer_only = None
75
  gc.collect()
76
  if torch.cuda.is_available():
77
  torch.cuda.empty_cache()
requirements.txt CHANGED
@@ -1,8 +1,6 @@
1
- --extra-index-url https://download.pytorch.org/whl/cpu
2
  gradio==6.18.0
3
  transformers>=4.40.0
4
  sentence-transformers>=3.0.0
5
- torch>=2.2.0
6
  numpy>=1.26.0
7
  pandas>=2.0.0
8
  accelerate>=0.30.0
 
 
1
  gradio==6.18.0
2
  transformers>=4.40.0
3
  sentence-transformers>=3.0.0
 
4
  numpy>=1.26.0
5
  pandas>=2.0.0
6
  accelerate>=0.30.0