Jellyfish042 commited on
Commit
88a3875
·
1 Parent(s): 8d6299f

Update UI limits and defaults

Browse files
Files changed (1) hide show
  1. app.py +52 -13
app.py CHANGED
@@ -10,7 +10,7 @@ import torch
10
 
11
  from llm_compressor import compress_tokens, decompress_bytes, load_rwkv_model, tokenize_text
12
 
13
- MAX_INPUT_CHARS = 8192
14
  SCRIPT_DIR = Path(__file__).parent.absolute()
15
  SUPPORT_DIR = SCRIPT_DIR / "support"
16
  MODELS_DIR = SCRIPT_DIR / "models"
@@ -180,8 +180,11 @@ def _load_model_and_tokenizer(model_path, tokenizer_name, strategy):
180
  raise gr.Error(f"Failed to load RWKV model: {exc}") from exc
181
 
182
 
183
- def _format_compress_stats(stats):
184
- return "\n".join(
 
 
 
185
  [
186
  f"- Tokens: {stats['tokens']}",
187
  f"- Original bytes: {stats['original_bytes']}",
@@ -192,15 +195,20 @@ def _format_compress_stats(stats):
192
  f"- Speed: {stats['speed_toks_per_s']:.2f} tokens/s",
193
  ]
194
  )
 
195
 
196
 
197
- def _format_decompress_stats(stats):
198
- return "\n".join(
 
 
 
199
  [
200
  f"- Tokens: {stats['tokens']}",
201
  f"- Time: {stats['duration_s']:.2f}s",
202
  ]
203
  )
 
204
 
205
 
206
  def _normalize_strategy(strategy):
@@ -209,11 +217,20 @@ def _normalize_strategy(strategy):
209
  return strategy
210
 
211
 
 
 
 
 
 
 
 
212
  def compress_ui(text, context_window, progress=gr.Progress()):
213
  if not text or not text.strip():
214
  raise gr.Error("Input text is empty.")
215
  if len(text) > MAX_INPUT_CHARS:
216
- raise gr.Error(f"Input is too long ({len(text)} chars). Max is {MAX_INPUT_CHARS}.")
 
 
217
 
218
  model_path = _resolve_default_model_path()
219
  tokenizer_path = _resolve_default_tokenizer_path()
@@ -237,7 +254,7 @@ def compress_ui(text, context_window, progress=gr.Progress()):
237
 
238
  b64 = base64.b64encode(data).decode("ascii")
239
  file_path = _write_temp_file(data)
240
- stats_text = _format_compress_stats(stats)
241
  if effective_strategy != requested_strategy:
242
  stats_text += "\n- Strategy: cpu fp32 (forced, CUDA unavailable)"
243
  else:
@@ -253,7 +270,7 @@ def decompress_ui(b64_data, file_data, context_window):
253
  effective_strategy = _resolve_strategy()
254
  model, tokenizer = _load_model_and_tokenizer(model_path, tokenizer_path, effective_strategy)
255
  text, stats = decompress_bytes(raw, model, tokenizer, context_window=context_window)
256
- stats_text = _format_decompress_stats(stats)
257
  if effective_strategy != requested_strategy:
258
  stats_text += "\n- Strategy: cpu fp32 (forced, CUDA unavailable)"
259
  else:
@@ -262,19 +279,41 @@ def decompress_ui(b64_data, file_data, context_window):
262
 
263
 
264
  def build_ui():
 
265
  with gr.Blocks() as demo:
266
- gr.Markdown("# RWKV LLM Text Compressor")
267
- gr.Markdown(
268
- "This is a proof-of-concept demo. Compression and decompression are slow, "
269
- "and the output is not portable across different models or tokenizers."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
  )
 
271
 
272
  context_window = gr.Slider(
273
  label="Context window",
274
  minimum=128,
275
  maximum=4096,
276
  step=128,
277
- value=2048,
278
  )
279
 
280
  gr.Markdown(f"Max input size: {MAX_INPUT_CHARS} characters.")
 
10
 
11
  from llm_compressor import compress_tokens, decompress_bytes, load_rwkv_model, tokenize_text
12
 
13
+ MAX_INPUT_CHARS = 16384
14
  SCRIPT_DIR = Path(__file__).parent.absolute()
15
  SUPPORT_DIR = SCRIPT_DIR / "support"
16
  MODELS_DIR = SCRIPT_DIR / "models"
 
180
  raise gr.Error(f"Failed to load RWKV model: {exc}") from exc
181
 
182
 
183
+ def _format_compress_stats(stats, char_count=None):
184
+ lines = []
185
+ if char_count is not None:
186
+ lines.append(f"- Characters: {char_count}")
187
+ lines.extend(
188
  [
189
  f"- Tokens: {stats['tokens']}",
190
  f"- Original bytes: {stats['original_bytes']}",
 
195
  f"- Speed: {stats['speed_toks_per_s']:.2f} tokens/s",
196
  ]
197
  )
198
+ return "\n".join(lines)
199
 
200
 
201
+ def _format_decompress_stats(stats, char_count=None):
202
+ lines = []
203
+ if char_count is not None:
204
+ lines.append(f"- Characters: {char_count}")
205
+ lines.extend(
206
  [
207
  f"- Tokens: {stats['tokens']}",
208
  f"- Time: {stats['duration_s']:.2f}s",
209
  ]
210
  )
211
+ return "\n".join(lines)
212
 
213
 
214
  def _normalize_strategy(strategy):
 
217
  return strategy
218
 
219
 
220
+ def _get_model_display_name():
221
+ env_model = os.getenv("RWKV_MODEL_PATH")
222
+ if env_model:
223
+ return Path(env_model).stem
224
+ return Path(DEFAULT_MODEL_FILENAME).stem
225
+
226
+
227
  def compress_ui(text, context_window, progress=gr.Progress()):
228
  if not text or not text.strip():
229
  raise gr.Error("Input text is empty.")
230
  if len(text) > MAX_INPUT_CHARS:
231
+ message = f"Input is too long ({len(text)} chars). Max is {MAX_INPUT_CHARS}."
232
+ gr.Info(message)
233
+ return "", f"- {message}", None
234
 
235
  model_path = _resolve_default_model_path()
236
  tokenizer_path = _resolve_default_tokenizer_path()
 
254
 
255
  b64 = base64.b64encode(data).decode("ascii")
256
  file_path = _write_temp_file(data)
257
+ stats_text = _format_compress_stats(stats, char_count=len(text))
258
  if effective_strategy != requested_strategy:
259
  stats_text += "\n- Strategy: cpu fp32 (forced, CUDA unavailable)"
260
  else:
 
270
  effective_strategy = _resolve_strategy()
271
  model, tokenizer = _load_model_and_tokenizer(model_path, tokenizer_path, effective_strategy)
272
  text, stats = decompress_bytes(raw, model, tokenizer, context_window=context_window)
273
+ stats_text = _format_decompress_stats(stats, char_count=len(text))
274
  if effective_strategy != requested_strategy:
275
  stats_text += "\n- Strategy: cpu fp32 (forced, CUDA unavailable)"
276
  else:
 
279
 
280
 
281
  def build_ui():
282
+ model_display = _get_model_display_name()
283
  with gr.Blocks() as demo:
284
+ gr.HTML(
285
+ f"""
286
+ <div style="text-align: center; margin-bottom: 16px;">
287
+ <h1 style="margin-bottom: 8px;">RWKV LLM Text Compressor</h1>
288
+ <p style="margin-bottom: 12px; color: #666;">
289
+ This is a proof-of-concept demo. Compression and decompression are slow,
290
+ and the output is not portable across different models or tokenizers.
291
+ </p>
292
+ <div style="display: flex; justify-content: center; align-items: center; gap: 10px; flex-wrap: wrap;">
293
+ <a href="https://github.com/Jellyfish042/uncheatable_eval" target="_blank" style="text-decoration: none;">
294
+ <img src="https://img.shields.io/badge/GitHub-Project-181717?logo=github" alt="GitHub Project">
295
+ </a>
296
+ <a href="https://huggingface.co/spaces/Jellyfish042/UncheatableEval" target="_blank" style="text-decoration: none;">
297
+ <img src="https://img.shields.io/badge/%F0%9F%8F%86%20Leaderboard-Gradio-ff7c00" alt="Leaderboard">
298
+ </a>
299
+ <a href="https://huggingface.co/spaces/Jellyfish042/Compression-Lens" target="_blank" style="text-decoration: none;">
300
+ <img src="https://img.shields.io/badge/%F0%9F%94%AC%20Compression--Lens-Visualization-blue" alt="Compression Lens">
301
+ </a>
302
+ </div>
303
+ <div style="margin-top: 10px; font-size: 0.95em; color: #444;">
304
+ Model: <code>{model_display}</code>
305
+ </div>
306
+ </div>
307
+ """
308
  )
309
+ gr.Markdown("If CUDA is unavailable, the app forces the strategy to cpu fp32.")
310
 
311
  context_window = gr.Slider(
312
  label="Context window",
313
  minimum=128,
314
  maximum=4096,
315
  step=128,
316
+ value=4096,
317
  )
318
 
319
  gr.Markdown(f"Max input size: {MAX_INPUT_CHARS} characters.")