Spaces:

Jellyfish042
/

Compression-Lens

Running

Jellyfish042 Claude Sonnet 4.5 commited on Jan 18

Commit

d68c16d

1 Parent(s): 49eb0e6

Optimize model loading with caching and improve performance

- Add global model cache to avoid reloading models on each evaluation
- Initialize both Qwen3 and RWKV7 models at startup
- Remove redundant memory cleanup between evaluations
- Simplify progress reporting with safe_progress helper
- Remove download button functionality for cleaner UI
- Add .gitignore to exclude model files and cache

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>

Files changed (3) hide show

.claude/settings.local.json +3 -1
.gitignore +27 -0
app.py +58 -84

.claude/settings.local.json CHANGED Viewed

@@ -6,7 +6,9 @@
       "Bash(git remote add:*)",
       "Bash(git push:*)",
       "Bash(git branch:*)",
-      "Bash(git commit -m \"$\\(cat <<''EOF''\nFix Gradio compatibility for HuggingFace Spaces\n\n- Upgrade gradio to >=5.0.0 to fix API schema bug\n- Add server_name and server_port to demo.launch\\(\\)\n\nCo-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>\nEOF\n\\)\")"
     ]
   }
 }

       "Bash(git remote add:*)",
       "Bash(git push:*)",
       "Bash(git branch:*)",
+      "Bash(git commit -m \"$\\(cat <<''EOF''\nFix Gradio compatibility for HuggingFace Spaces\n\n- Upgrade gradio to >=5.0.0 to fix API schema bug\n- Add server_name and server_port to demo.launch\\(\\)\n\nCo-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>\nEOF\n\\)\")",
+      "Bash(git commit:*)",
+      "Bash(git reset:*)"
     ]
   }
 }

.gitignore ADDED Viewed

	@@ -0,0 +1,27 @@

+# Python cache
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+# Model files
+models/
+*.pth
+*.bin
+*.safetensors
+# Virtual environment
+venv/
+env/
+ENV/
+# IDE
+.vscode/
+.idea/
+# OS
+.DS_Store
+Thumbs.db
+# Gradio
+flagged/

app.py CHANGED Viewed

@@ -6,7 +6,6 @@ Compare byte-level prediction performance between Qwen3-1.7B-Base and RWKV7-G1C-
 import gc
 import os
-import tempfile
 from pathlib import Path
 import gradio as gr
@@ -30,6 +29,13 @@ SUPPORT_DIR = SCRIPT_DIR / "support"
 MAX_TEXT_LENGTH = 4000
 MIN_TEXT_LENGTH = 10
 # Example texts
 EXAMPLE_NEWS = """The rapid advancement of artificial intelligence has sparked both excitement and concern among researchers worldwide. While AI systems demonstrate remarkable capabilities in language understanding and generation, questions remain about their potential impact on employment and society."""
@@ -56,9 +62,6 @@ def download_rwkv_model(progress=None):
     MODELS_DIR.mkdir(parents=True, exist_ok=True)
-    if progress:
-        progress(0.1, desc="Downloading RWKV7 model...")
     # Download from HuggingFace Hub
     downloaded_path = hf_hub_download(
         repo_id="BlinkDL/rwkv7-g1",
@@ -132,6 +135,10 @@ def load_rwkv7_model(model_path: str):
     else:
         strategy = "cuda fp16"
     model = RWKV(model=model_path, strategy=strategy)
     vocab_path = str(SUPPORT_DIR / "rwkv_vocab_v20230424.txt")
@@ -156,6 +163,27 @@ def validate_input(text: str) -> tuple[bool, str]:
     return True, text
 def wrap_html_in_iframe(html: str) -> str:
     """Wrap HTML in an iframe for Gradio display."""
     escaped = html.replace('"', '&quot;')
@@ -173,6 +201,9 @@ def run_evaluation(text: str, progress=gr.Progress()):
     from core.evaluator import evaluate_hf_single_sample, evaluate_rwkv7_single_sample
     from visualization.html_generator import generate_comparison_html
     # Validate input
     valid, result = validate_input(text)
     if not valid:
@@ -180,52 +211,33 @@ def run_evaluation(text: str, progress=gr.Progress()):
     text = result  # Use cleaned text
-    try:
-        # Step 1: Download RWKV model if needed
-        progress(0.05, desc="Checking RWKV7 model...")
-        rwkv_model_path = download_rwkv_model(progress)
-        # Step 2: Load Qwen model
-        progress(0.1, desc="Loading Qwen3-1.7B-Base...")
-        qwen_model, qwen_tokenizer = load_qwen_model()
-        # Step 3: Evaluate Qwen
-        progress(0.3, desc="Evaluating with Qwen3...")
         result_qwen = evaluate_hf_single_sample(
-            qwen_model,
-            qwen_tokenizer,
             text,
             bos_mode="add_newline_token"
         )
-        # Step 4: Free Qwen memory
-        progress(0.4, desc="Freeing memory...")
-        del qwen_model
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-        gc.collect()
-        # Step 5: Load RWKV7 model
-        progress(0.5, desc="Loading RWKV7-G1C-1.5B...")
-        rwkv_model, rwkv_tokenizer = load_rwkv7_model(rwkv_model_path)
-        # Step 6: Evaluate RWKV7
-        progress(0.7, desc="Evaluating with RWKV7...")
         result_rwkv = evaluate_rwkv7_single_sample(
-            rwkv_model,
-            rwkv_tokenizer,
             text
         )
-        # Step 7: Free RWKV memory
-        progress(0.8, desc="Freeing memory...")
-        del rwkv_model
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-        gc.collect()
         # Step 8: Generate visualization
-        progress(0.9, desc="Generating visualization...")
         html = generate_comparison_html(
             text=text,
             byte_losses_a=result_qwen["byte_wise_losses"],
@@ -243,11 +255,7 @@ def run_evaluation(text: str, progress=gr.Progress()):
         # Wrap HTML for iframe display
         wrapped_html = wrap_html_in_iframe(html)
-        # Store HTML for download
-        global _last_html_content
-        _last_html_content = html
-        progress(1.0, desc="Done!")
         return wrapped_html
@@ -272,10 +280,6 @@ def clear_inputs():
     return "", None
-# Global variable to store the last generated HTML for download
-_last_html_content = None
 # Build Gradio UI
 with gr.Blocks(
     title="UncheatableEval: Qwen3 vs RWKV7",
@@ -320,7 +324,6 @@ with gr.Blocks(
     with gr.Row():
         with gr.Column():
             output_html = gr.HTML(label="Visualization")
-            download_file = gr.File(label="📥 Download HTML", visible=False)
     # Event handlers
     news_btn.click(fn=lambda: EXAMPLE_NEWS, outputs=[text_input])
@@ -332,45 +335,16 @@ with gr.Blocks(
         outputs=[text_input, output_html]
     )
-    def run_and_prepare_download(text, progress=gr.Progress()):
-        """Run evaluation and prepare download file."""
-        wrapped_html = run_evaluation(text, progress)
-        # Save HTML for download
-        temp_file = tempfile.NamedTemporaryFile(
-            mode='w',
-            suffix='.html',
-            delete=False,
-            encoding='utf-8'
-        )
-        temp_file.write(_last_html_content)
-        temp_file.close()
-        return wrapped_html, temp_file.name
     run_btn.click(
-        fn=run_and_prepare_download,
         inputs=[text_input],
-        outputs=[output_html, download_btn]
     )
-    gr.Markdown("""
-    ---
-    ### About
-    This tool uses [UncheatableEval](https://github.com/Jellyfish042/UncheatableEval) to compare
-    language model performance at the byte level.
-    **Models:**
-    - **Qwen3-1.7B-Base**: Transformer-based model from Alibaba
-    - **RWKV7-G1C-1.5B**: Linear attention model from RWKV team
-    **How it works:**
-    1. Both models predict each byte in the input text
-    2. Lower prediction loss = better compression = better understanding
-    3. The visualization shows where each model performs better or worse
-    """)
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860, share=False)

 import gc
 import os
 from pathlib import Path
 import gradio as gr
 MAX_TEXT_LENGTH = 4000
 MIN_TEXT_LENGTH = 10
+# Global model cache
+_qwen_model = None
+_qwen_tokenizer = None
+_rwkv_model = None
+_rwkv_tokenizer = None
+_rwkv_model_path = None
 # Example texts
 EXAMPLE_NEWS = """The rapid advancement of artificial intelligence has sparked both excitement and concern among researchers worldwide. While AI systems demonstrate remarkable capabilities in language understanding and generation, questions remain about their potential impact on employment and society."""
     MODELS_DIR.mkdir(parents=True, exist_ok=True)
     # Download from HuggingFace Hub
     downloaded_path = hf_hub_download(
         repo_id="BlinkDL/rwkv7-g1",
     else:
         strategy = "cuda fp16"
+    # RWKV library automatically adds .pth extension, so remove it if present
+    if model_path.endswith('.pth'):
+        model_path = model_path[:-4]
     model = RWKV(model=model_path, strategy=strategy)
     vocab_path = str(SUPPORT_DIR / "rwkv_vocab_v20230424.txt")
     return True, text
+def initialize_models():
+    """Initialize and cache both models at startup."""
+    global _qwen_model, _qwen_tokenizer, _rwkv_model, _rwkv_tokenizer, _rwkv_model_path
+    print("Initializing models...")
+    # Download RWKV model if needed
+    print("Checking RWKV7 model...")
+    _rwkv_model_path = download_rwkv_model()
+    # Load Qwen model
+    print("Loading Qwen3-1.7B-Base...")
+    _qwen_model, _qwen_tokenizer = load_qwen_model()
+    # Load RWKV7 model
+    print("Loading RWKV7-G1C-1.5B...")
+    _rwkv_model, _rwkv_tokenizer = load_rwkv7_model(_rwkv_model_path)
+    print("Models loaded successfully!")
 def wrap_html_in_iframe(html: str) -> str:
     """Wrap HTML in an iframe for Gradio display."""
     escaped = html.replace('"', '&quot;')
     from core.evaluator import evaluate_hf_single_sample, evaluate_rwkv7_single_sample
     from visualization.html_generator import generate_comparison_html
+    # Use cached models
+    global _qwen_model, _qwen_tokenizer, _rwkv_model, _rwkv_tokenizer
     # Validate input
     valid, result = validate_input(text)
     if not valid:
     text = result  # Use cleaned text
+    # Helper function to safely call progress
+    def safe_progress(value, desc):
+        try:
+            progress(value, desc=desc)
+        except:
+            pass
+    try:
+        # Step 1: Evaluate Qwen (using cached model)
+        safe_progress(0.2, "Evaluating with Qwen3...")
         result_qwen = evaluate_hf_single_sample(
+            _qwen_model,
+            _qwen_tokenizer,
             text,
             bos_mode="add_newline_token"
         )
+        # Step 2: Evaluate RWKV7 (using cached model)
+        safe_progress(0.6, "Evaluating with RWKV7...")
         result_rwkv = evaluate_rwkv7_single_sample(
+            _rwkv_model,
+            _rwkv_tokenizer,
             text
         )
         # Step 8: Generate visualization
+        safe_progress(0.9, "Generating visualization...")
         html = generate_comparison_html(
             text=text,
             byte_losses_a=result_qwen["byte_wise_losses"],
         # Wrap HTML for iframe display
         wrapped_html = wrap_html_in_iframe(html)
+        safe_progress(1.0, "Done!")
         return wrapped_html
     return "", None
 # Build Gradio UI
 with gr.Blocks(
     title="UncheatableEval: Qwen3 vs RWKV7",
     with gr.Row():
         with gr.Column():
             output_html = gr.HTML(label="Visualization")
     # Event handlers
     news_btn.click(fn=lambda: EXAMPLE_NEWS, outputs=[text_input])
         outputs=[text_input, output_html]
     )
     run_btn.click(
+        fn=run_evaluation,
         inputs=[text_input],
+        outputs=[output_html]
     )
 if __name__ == "__main__":
+    # Initialize models before launching the app
+    initialize_models()
+    # Launch the Gradio app
     demo.launch(server_name="0.0.0.0", server_port=7860, share=False)