Spaces:

Jellyfish042
/

LLM-Compressor

Sleeping

App Files Files Community

Jellyfish042 commited on Jan 19

Commit

88a3875

1 Parent(s): 8d6299f

Update UI limits and defaults

Browse files

Files changed (1) hide show

app.py +52 -13

app.py CHANGED Viewed

@@ -10,7 +10,7 @@ import torch
 from llm_compressor import compress_tokens, decompress_bytes, load_rwkv_model, tokenize_text
-MAX_INPUT_CHARS = 8192
 SCRIPT_DIR = Path(__file__).parent.absolute()
 SUPPORT_DIR = SCRIPT_DIR / "support"
 MODELS_DIR = SCRIPT_DIR / "models"
@@ -180,8 +180,11 @@ def _load_model_and_tokenizer(model_path, tokenizer_name, strategy):
         raise gr.Error(f"Failed to load RWKV model: {exc}") from exc
-def _format_compress_stats(stats):
-    return "\n".join(
         [
             f"- Tokens: {stats['tokens']}",
             f"- Original bytes: {stats['original_bytes']}",
@@ -192,15 +195,20 @@ def _format_compress_stats(stats):
             f"- Speed: {stats['speed_toks_per_s']:.2f} tokens/s",
         ]
     )
-def _format_decompress_stats(stats):
-    return "\n".join(
         [
             f"- Tokens: {stats['tokens']}",
             f"- Time: {stats['duration_s']:.2f}s",
         ]
     )
 def _normalize_strategy(strategy):
@@ -209,11 +217,20 @@ def _normalize_strategy(strategy):
     return strategy
 def compress_ui(text, context_window, progress=gr.Progress()):
     if not text or not text.strip():
         raise gr.Error("Input text is empty.")
     if len(text) > MAX_INPUT_CHARS:
-        raise gr.Error(f"Input is too long ({len(text)} chars). Max is {MAX_INPUT_CHARS}.")
     model_path = _resolve_default_model_path()
     tokenizer_path = _resolve_default_tokenizer_path()
@@ -237,7 +254,7 @@ def compress_ui(text, context_window, progress=gr.Progress()):
     b64 = base64.b64encode(data).decode("ascii")
     file_path = _write_temp_file(data)
-    stats_text = _format_compress_stats(stats)
     if effective_strategy != requested_strategy:
         stats_text += "\n- Strategy: cpu fp32 (forced, CUDA unavailable)"
     else:
@@ -253,7 +270,7 @@ def decompress_ui(b64_data, file_data, context_window):
     effective_strategy = _resolve_strategy()
     model, tokenizer = _load_model_and_tokenizer(model_path, tokenizer_path, effective_strategy)
     text, stats = decompress_bytes(raw, model, tokenizer, context_window=context_window)
-    stats_text = _format_decompress_stats(stats)
     if effective_strategy != requested_strategy:
         stats_text += "\n- Strategy: cpu fp32 (forced, CUDA unavailable)"
     else:
@@ -262,19 +279,41 @@ def decompress_ui(b64_data, file_data, context_window):
 def build_ui():
     with gr.Blocks() as demo:
-        gr.Markdown("# RWKV LLM Text Compressor")
-        gr.Markdown(
-            "This is a proof-of-concept demo. Compression and decompression are slow, "
-            "and the output is not portable across different models or tokenizers."
         )
         context_window = gr.Slider(
             label="Context window",
             minimum=128,
             maximum=4096,
             step=128,
-            value=2048,
         )
         gr.Markdown(f"Max input size: {MAX_INPUT_CHARS} characters.")

 from llm_compressor import compress_tokens, decompress_bytes, load_rwkv_model, tokenize_text
+MAX_INPUT_CHARS = 16384
 SCRIPT_DIR = Path(__file__).parent.absolute()
 SUPPORT_DIR = SCRIPT_DIR / "support"
 MODELS_DIR = SCRIPT_DIR / "models"
         raise gr.Error(f"Failed to load RWKV model: {exc}") from exc
+def _format_compress_stats(stats, char_count=None):
+    lines = []
+    if char_count is not None:
+        lines.append(f"- Characters: {char_count}")
+    lines.extend(
         [
             f"- Tokens: {stats['tokens']}",
             f"- Original bytes: {stats['original_bytes']}",
             f"- Speed: {stats['speed_toks_per_s']:.2f} tokens/s",
         ]
     )
+    return "\n".join(lines)
+def _format_decompress_stats(stats, char_count=None):
+    lines = []
+    if char_count is not None:
+        lines.append(f"- Characters: {char_count}")
+    lines.extend(
         [
             f"- Tokens: {stats['tokens']}",
             f"- Time: {stats['duration_s']:.2f}s",
         ]
     )
+    return "\n".join(lines)
 def _normalize_strategy(strategy):
     return strategy
+def _get_model_display_name():
+    env_model = os.getenv("RWKV_MODEL_PATH")
+    if env_model:
+        return Path(env_model).stem
+    return Path(DEFAULT_MODEL_FILENAME).stem
 def compress_ui(text, context_window, progress=gr.Progress()):
     if not text or not text.strip():
         raise gr.Error("Input text is empty.")
     if len(text) > MAX_INPUT_CHARS:
+        message = f"Input is too long ({len(text)} chars). Max is {MAX_INPUT_CHARS}."
+        gr.Info(message)
+        return "", f"- {message}", None
     model_path = _resolve_default_model_path()
     tokenizer_path = _resolve_default_tokenizer_path()
     b64 = base64.b64encode(data).decode("ascii")
     file_path = _write_temp_file(data)
+    stats_text = _format_compress_stats(stats, char_count=len(text))
     if effective_strategy != requested_strategy:
         stats_text += "\n- Strategy: cpu fp32 (forced, CUDA unavailable)"
     else:
     effective_strategy = _resolve_strategy()
     model, tokenizer = _load_model_and_tokenizer(model_path, tokenizer_path, effective_strategy)
     text, stats = decompress_bytes(raw, model, tokenizer, context_window=context_window)
+    stats_text = _format_decompress_stats(stats, char_count=len(text))
     if effective_strategy != requested_strategy:
         stats_text += "\n- Strategy: cpu fp32 (forced, CUDA unavailable)"
     else:
 def build_ui():
+    model_display = _get_model_display_name()
     with gr.Blocks() as demo:
+        gr.HTML(
+            f"""
+            <div style="text-align: center; margin-bottom: 16px;">
+                <h1 style="margin-bottom: 8px;">RWKV LLM Text Compressor</h1>
+                <p style="margin-bottom: 12px; color: #666;">
+                    This is a proof-of-concept demo. Compression and decompression are slow,
+                    and the output is not portable across different models or tokenizers.
+                </p>
+                <div style="display: flex; justify-content: center; align-items: center; gap: 10px; flex-wrap: wrap;">
+                    <a href="https://github.com/Jellyfish042/uncheatable_eval" target="_blank" style="text-decoration: none;">
+                        <img src="https://img.shields.io/badge/GitHub-Project-181717?logo=github" alt="GitHub Project">
+                    </a>
+                    <a href="https://huggingface.co/spaces/Jellyfish042/UncheatableEval" target="_blank" style="text-decoration: none;">
+                        <img src="https://img.shields.io/badge/%F0%9F%8F%86%20Leaderboard-Gradio-ff7c00" alt="Leaderboard">
+                    </a>
+                    <a href="https://huggingface.co/spaces/Jellyfish042/Compression-Lens" target="_blank" style="text-decoration: none;">
+                        <img src="https://img.shields.io/badge/%F0%9F%94%AC%20Compression--Lens-Visualization-blue" alt="Compression Lens">
+                    </a>
+                </div>
+                <div style="margin-top: 10px; font-size: 0.95em; color: #444;">
+                    Model: <code>{model_display}</code>
+                </div>
+            </div>
+            """
         )
+        gr.Markdown("If CUDA is unavailable, the app forces the strategy to cpu fp32.")
         context_window = gr.Slider(
             label="Context window",
             minimum=128,
             maximum=4096,
             step=128,
+            value=4096,
         )
         gr.Markdown(f"Max input size: {MAX_INPUT_CHARS} characters.")