Spaces:

Jellyfish042
/

Compression-Lens

Running

Jellyfish042 Claude Sonnet 4.5 commited on Jan 18

Commit

cddd3a5

1 Parent(s): 6bbbdc0

Apply code formatting and update title

Changes:
- Applied automatic code formatting (line length, quotes)
- Updated title: "Qwen3 vs RWKV7" → "RWKV-7 vs Qwen3"
- Reformatted multi-line function calls for consistency

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>

Files changed (1) hide show

app.py +24 -71

app.py CHANGED Viewed

@@ -50,10 +50,7 @@ def download_rwkv_model(progress=None):
     # Download from HuggingFace Hub
     downloaded_path = hf_hub_download(
-        repo_id="BlinkDL/rwkv7-g1",
-        filename=RWKV_MODEL_FILENAME,
-        local_dir=str(MODELS_DIR),
-        local_dir_use_symlinks=False
     )
     return downloaded_path
@@ -63,40 +60,18 @@ def load_qwen_model():
     """Load Qwen3-1.7B-Base model."""
     from transformers import AutoTokenizer, AutoModelForCausalLM
-    tokenizer = AutoTokenizer.from_pretrained(
-        QWEN_MODEL_ID,
-        trust_remote_code=True
-    )
     # Configure based on device
     if IS_CPU:
-        model_kwargs = {
-            "torch_dtype": torch.float32,
-            "device_map": None,
-            "trust_remote_code": True,
-            "low_cpu_mem_usage": True
-        }
-        model = AutoModelForCausalLM.from_pretrained(
-            QWEN_MODEL_ID,
-            **model_kwargs
-        ).eval()
     else:
-        model_kwargs = {
-            "torch_dtype": torch.bfloat16,
-            "device_map": "auto",
-            "trust_remote_code": True
-        }
         try:
-            model = AutoModelForCausalLM.from_pretrained(
-                QWEN_MODEL_ID,
-                attn_implementation="flash_attention_2",
-                **model_kwargs
-            ).eval()
         except Exception:
-            model = AutoModelForCausalLM.from_pretrained(
-                QWEN_MODEL_ID,
-                **model_kwargs
-            ).eval()
     return model, tokenizer
@@ -122,7 +97,7 @@ def load_rwkv7_model(model_path: str):
         strategy = "cuda fp16"
     # RWKV library automatically adds .pth extension, so remove it if present
-    if model_path.endswith('.pth'):
         model_path = model_path[:-4]
     model = RWKV(model=model_path, strategy=strategy)
@@ -174,14 +149,14 @@ def wrap_html_in_iframe(html: str) -> str:
     """Wrap HTML in an iframe for Gradio display."""
     # For srcdoc attribute, we only need to escape quotes
     # The HTML entities inside (like &quot;, &#10;) should remain as-is
-    escaped = html.replace('"', '&quot;')
-    return f'''
     <div style="width:100%;height:700px;border:1px solid #ddd;border-radius:8px;overflow:hidden;">
         <iframe srcdoc="{escaped}"
                 style="width:100%;height:100%;border:none;"
                 sandbox="allow-scripts"></iframe>
     </div>
-    '''
 def run_evaluation(text: str, progress=gr.Progress()):
@@ -202,20 +177,11 @@ def run_evaluation(text: str, progress=gr.Progress()):
     try:
         # Step 1: Evaluate Qwen (using cached model)
         progress(0, desc="Evaluating with Qwen3...")
-        result_qwen = evaluate_hf_single_sample(
-            _qwen_model,
-            _qwen_tokenizer,
-            text,
-            bos_mode="add_newline_token"
-        )
         # Step 2: Evaluate RWKV7 (using cached model)
         progress(0, desc="Evaluating with RWKV7...")
-        result_rwkv = evaluate_rwkv7_single_sample(
-            _rwkv_model,
-            _rwkv_tokenizer,
-            text
-        )
         # Step 3: Generate visualization
         progress(0, desc="Generating visualization...")
@@ -230,7 +196,7 @@ def run_evaluation(text: str, progress=gr.Progress()):
             tokenizer_a=result_rwkv["tokenizer"],
             tokenizer_b=result_qwen["tokenizer"],
             model_type_a="rwkv7",
-            model_type_b="hf"
         )
         # Wrap HTML for iframe display
@@ -242,11 +208,7 @@ def run_evaluation(text: str, progress=gr.Progress()):
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
         gc.collect()
-        raise gr.Error(
-            "GPU memory insufficient. Please try:\n"
-            "1. Use shorter text\n"
-            "2. Wait a moment and try again"
-        )
     except Exception as e:
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
@@ -260,15 +222,13 @@ def clear_inputs():
 # Build Gradio UI
-with gr.Blocks(
-    title="Compression-Lens: Qwen3 vs RWKV7",
-    theme=gr.themes.Soft()
-) as demo:
-    gr.Markdown("""
-    # 🔬 Compression-Lens: Qwen3 vs RWKV7 Byte-Level Comparison
-    Compare the byte-level prediction performance between **Qwen3-1.7B-Base** and **RWKV7-G1C-1.5B**.
-    """)
     with gr.Row():
         with gr.Column(scale=1):
@@ -290,16 +250,9 @@ with gr.Blocks(
             output_html = gr.HTML(label="Visualization")
     # Event handlers
-    clear_btn.click(
-        fn=clear_inputs,
-        outputs=[text_input, output_html]
-    )
-    run_btn.click(
-        fn=run_evaluation,
-        inputs=[text_input],
-        outputs=[output_html]
-    )
 if __name__ == "__main__":

     # Download from HuggingFace Hub
     downloaded_path = hf_hub_download(
+        repo_id="BlinkDL/rwkv7-g1", filename=RWKV_MODEL_FILENAME, local_dir=str(MODELS_DIR), local_dir_use_symlinks=False
     )
     return downloaded_path
     """Load Qwen3-1.7B-Base model."""
     from transformers import AutoTokenizer, AutoModelForCausalLM
+    tokenizer = AutoTokenizer.from_pretrained(QWEN_MODEL_ID, trust_remote_code=True)
     # Configure based on device
     if IS_CPU:
+        model_kwargs = {"torch_dtype": torch.float32, "device_map": None, "trust_remote_code": True, "low_cpu_mem_usage": True}
+        model = AutoModelForCausalLM.from_pretrained(QWEN_MODEL_ID, **model_kwargs).eval()
     else:
+        model_kwargs = {"torch_dtype": torch.bfloat16, "device_map": "auto", "trust_remote_code": True}
         try:
+            model = AutoModelForCausalLM.from_pretrained(QWEN_MODEL_ID, attn_implementation="flash_attention_2", **model_kwargs).eval()
         except Exception:
+            model = AutoModelForCausalLM.from_pretrained(QWEN_MODEL_ID, **model_kwargs).eval()
     return model, tokenizer
         strategy = "cuda fp16"
     # RWKV library automatically adds .pth extension, so remove it if present
+    if model_path.endswith(".pth"):
         model_path = model_path[:-4]
     model = RWKV(model=model_path, strategy=strategy)
     """Wrap HTML in an iframe for Gradio display."""
     # For srcdoc attribute, we only need to escape quotes
     # The HTML entities inside (like &quot;, &#10;) should remain as-is
+    escaped = html.replace('"', "&quot;")
+    return f"""
     <div style="width:100%;height:700px;border:1px solid #ddd;border-radius:8px;overflow:hidden;">
         <iframe srcdoc="{escaped}"
                 style="width:100%;height:100%;border:none;"
                 sandbox="allow-scripts"></iframe>
     </div>
+    """
 def run_evaluation(text: str, progress=gr.Progress()):
     try:
         # Step 1: Evaluate Qwen (using cached model)
         progress(0, desc="Evaluating with Qwen3...")
+        result_qwen = evaluate_hf_single_sample(_qwen_model, _qwen_tokenizer, text, bos_mode="add_newline_token")
         # Step 2: Evaluate RWKV7 (using cached model)
         progress(0, desc="Evaluating with RWKV7...")
+        result_rwkv = evaluate_rwkv7_single_sample(_rwkv_model, _rwkv_tokenizer, text)
         # Step 3: Generate visualization
         progress(0, desc="Generating visualization...")
             tokenizer_a=result_rwkv["tokenizer"],
             tokenizer_b=result_qwen["tokenizer"],
             model_type_a="rwkv7",
+            model_type_b="hf",
         )
         # Wrap HTML for iframe display
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
         gc.collect()
+        raise gr.Error("GPU memory insufficient. Please try:\n" "1. Use shorter text\n" "2. Wait a moment and try again")
     except Exception as e:
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
 # Build Gradio UI
+with gr.Blocks(title="Compression-Lens: RWKV-7 vs Qwen3", theme=gr.themes.Soft()) as demo:
+    gr.Markdown(
+        """
+    # 🔬 Compression-Lens: RWKV-7 vs Qwen3 Byte-Level Comparison
+    Compare the byte-level prediction performance between **RWKV7-G1C-1.5B** and **Qwen3-1.7B-Base**.
+    """
+    )
     with gr.Row():
         with gr.Column(scale=1):
             output_html = gr.HTML(label="Visualization")
     # Event handlers
+    clear_btn.click(fn=clear_inputs, outputs=[text_input, output_html])
+    run_btn.click(fn=run_evaluation, inputs=[text_input], outputs=[output_html])
 if __name__ == "__main__":