Spaces:

build-small-hackathon
/

tiny-press

Paused

pr/2

by sriharsha-cr - opened Jun 13

←

Files changed (5) hide show

README.md CHANGED Viewed

@@ -35,6 +35,7 @@ No cloud. No API bill. Two small models running quietly on your machine.
 [![TinyPress Demo](https://img.youtube.com/vi/hDbIDtjjiB0/0.jpg)](https://youtu.be/hDbIDtjjiB0)
 ---
 ## Why this fits Thousand Token Wood
@@ -134,7 +135,7 @@ Full docs: [Architecture](docs/architecture.md) · [Setup](docs/setup.md) · [Ge
 Built by **[Sriharsha C R](https://www.linkedin.com/in/sriharsha-cr)** — AI Engineer and Cloud Native developer.
-[![LinkedIn](https://img.shields.io/badge/LinkedIn-sriharsha--cr-0a66c2?logo=linkedin&logoColor=white)](https://www.linkedin.com/in/sriharsha-cr)
-[![X / Twitter](https://img.shields.io/badge/X-@sriharsha__cr-000000?logo=x&logoColor=white)](https://x.com/sriharsha_cr)
 [![HuggingFace](https://img.shields.io/badge/HuggingFace-sriharsha--cr-ff9d00?logo=huggingface&logoColor=white)](https://huggingface.co/sriharsha-cr)
 [![GitHub](https://img.shields.io/badge/GitHub-SriharshaCR-181717?logo=github&logoColor=white)](https://github.com/SriharshaCR)

 [![TinyPress Demo](https://img.youtube.com/vi/hDbIDtjjiB0/0.jpg)](https://youtu.be/hDbIDtjjiB0)
 ---
 ## Why this fits Thousand Token Wood
 Built by **[Sriharsha C R](https://www.linkedin.com/in/sriharsha-cr)** — AI Engineer and Cloud Native developer.
+[![LinkedIn](https://img.shields.io/badge/LinkedIn-sriharsha--cr-0a66c2?logo=linkedin&logoColor=white)](https://www.linkedin.com/posts/sriharsha-cr_tinypress-prompt-compression-engine-activity-7471426128331624448-aKfe)
+[![X / Twitter](https://img.shields.io/badge/X-@sriharsha__cr-000000?logo=x&logoColor=white)](https://x.com/sriharsha_cr/status/2065662576684650879)
 [![HuggingFace](https://img.shields.io/badge/HuggingFace-sriharsha--cr-ff9d00?logo=huggingface&logoColor=white)](https://huggingface.co/sriharsha-cr)
 [![GitHub](https://img.shields.io/badge/GitHub-SriharshaCR-181717?logo=github&logoColor=white)](https://github.com/SriharshaCR)

app.py CHANGED Viewed

@@ -2,6 +2,12 @@ import gradio as gr
 import config
 from ui.compress_tab import build_compress_tab
 from ui.history_tab import build_history_tab
 def build_app() -> gr.Blocks:

 import config
 from ui.compress_tab import build_compress_tab
 from ui.history_tab import build_history_tab
+from models.model_loader import get_tokenizer_only
+try:
+    get_tokenizer_only()  # pre-warm; first keystroke is instant
+except Exception:
+    pass  # falls back to lazy load on first keystroke
 def build_app() -> gr.Blocks:

core/tokenizer_utils.py CHANGED Viewed

@@ -1,13 +1,13 @@
-from models.model_loader import get_llm
 def count_tokens(text: str) -> int:
-    _, tokenizer = get_llm()
     return len(tokenizer.encode(text, add_special_tokens=False))
 def get_token_strings(text: str) -> list[str]:
     """Return the decoded surface string for every token in text."""
-    _, tokenizer = get_llm()
     ids = tokenizer.encode(text, add_special_tokens=False)
     return [tokenizer.decode([i]) for i in ids]

+from models.model_loader import get_tokenizer_only
 def count_tokens(text: str) -> int:
+    tokenizer = get_tokenizer_only()
     return len(tokenizer.encode(text, add_special_tokens=False))
 def get_token_strings(text: str) -> list[str]:
     """Return the decoded surface string for every token in text."""
+    tokenizer = get_tokenizer_only()
     ids = tokenizer.encode(text, add_special_tokens=False)
     return [tokenizer.decode([i]) for i in ids]

models/model_loader.py CHANGED Viewed

@@ -6,6 +6,7 @@ import config
 _llm = None
 _tokenizer = None
 _embedder = None
 _current_model_id = None
 _current_embedder_id = None
@@ -24,6 +25,15 @@ def get_current_embedder_id() -> str | None:
     return _current_embedder_id
 def get_llm():
     global _llm, _tokenizer
     if _llm is None:
@@ -55,12 +65,13 @@ def _load_llm(model_id: str):
 def _unload_llm():
     """Free GPU/CPU memory before loading a different model."""
-    global _llm, _tokenizer, _current_model_id
     del _llm
     del _tokenizer
     _llm = None
     _tokenizer = None
     _current_model_id = None
     gc.collect()
     if torch.cuda.is_available():
         torch.cuda.empty_cache()

 _llm = None
 _tokenizer = None
+_tokenizer_only = None
 _embedder = None
 _current_model_id = None
 _current_embedder_id = None
     return _current_embedder_id
+def get_tokenizer_only():
+    global _tokenizer_only
+    if _tokenizer is not None:
+        return _tokenizer
+    if _tokenizer_only is None:
+        _tokenizer_only = AutoTokenizer.from_pretrained(config.LLM_MODEL)
+    return _tokenizer_only
 def get_llm():
     global _llm, _tokenizer
     if _llm is None:
 def _unload_llm():
     """Free GPU/CPU memory before loading a different model."""
+    global _llm, _tokenizer, _current_model_id, _tokenizer_only
     del _llm
     del _tokenizer
     _llm = None
     _tokenizer = None
     _current_model_id = None
+    _tokenizer_only = None
     gc.collect()
     if torch.cuda.is_available():
         torch.cuda.empty_cache()

requirements.txt CHANGED Viewed

@@ -1,8 +1,6 @@
---extra-index-url https://download.pytorch.org/whl/cpu
 gradio==6.18.0
 transformers>=4.40.0
 sentence-transformers>=3.0.0
-torch>=2.2.0
 numpy>=1.26.0
 pandas>=2.0.0
 accelerate>=0.30.0

 gradio==6.18.0
 transformers>=4.40.0
 sentence-transformers>=3.0.0
 numpy>=1.26.0
 pandas>=2.0.0
 accelerate>=0.30.0