Spaces:

Jellyfish042
/

Compression-Lens

Sleeping

App Files Files Community

Jellyfish042 commited on Feb 12

Commit

3d88788

1 Parent(s): 7a459aa

update to g1d

Browse files

Files changed (3) hide show

README.md +1 -1
app.py +7 -7
precompute_example.py +4 -4

README.md CHANGED Viewed

@@ -36,7 +36,7 @@ Compare the byte-level prediction performance between models.
 | Model | Type | Parameters | Architecture |
 |-------|------|------------|--------------|
 | Qwen3-1.7B-Base | Transformer | 1.7B | Dense attention |
-| RWKV7-G1C-1.5B | RWKV | 1.5B | Linear attention |
 ## Technical Details

 | Model | Type | Parameters | Architecture |
 |-------|------|------------|--------------|
 | Qwen3-1.7B-Base | Transformer | 1.7B | Dense attention |
+| RWKV7-G1D-1.5B | RWKV | 1.5B | Linear attention |
 ## Technical Details

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """
 UncheatableEval Visualization - Hugging Face Space
-Compare byte-level prediction performance between Qwen3-1.7B-Base and RWKV7-G1C-1.5B.
 """
 import gc
@@ -19,8 +19,8 @@ IS_CPU = DEVICE == "cpu"
 # Model configuration
 QWEN_MODEL_ID = "Qwen/Qwen3-1.7B-Base"
-RWKV_MODEL_URL = "https://huggingface.co/BlinkDL/rwkv7-g1/resolve/main/rwkv7-g1c-1.5b-20260110-ctx8192.pth"
-RWKV_MODEL_FILENAME = "rwkv7-g1c-1.5b-20260110-ctx8192.pth"
 # Get the directory where this script is located
 SCRIPT_DIR = Path(__file__).parent.absolute()
@@ -85,7 +85,7 @@ def load_qwen_model():
 def load_rwkv7_model(model_path: str):
-    """Load RWKV7-G1C-1.5B model."""
     os.environ["RWKV_JIT_ON"] = "1"
     os.environ["RWKV_V7_ON"] = "1"
@@ -172,7 +172,7 @@ def initialize_models():
     _qwen_model, _qwen_tokenizer = load_qwen_model()
     # Load RWKV7 model
-    print("Loading RWKV7-G1C-1.5B...")
     _rwkv_model, _rwkv_tokenizer = load_rwkv7_model(_rwkv_model_path)
     # Initialize stats manager
@@ -267,7 +267,7 @@ def run_evaluation(text: str, progress=gr.Progress()):
             text=text,
             byte_losses_a=result_rwkv["byte_wise_losses"],
             byte_losses_b=result_qwen["byte_wise_losses"],
-            model_a_name="RWKV7-G1C-1.5B",
             model_b_name="Qwen3-1.7B-Base",
             topk_predictions_a=result_rwkv["top5_predictions"],
             topk_predictions_b=result_qwen["top5_predictions"],
@@ -352,7 +352,7 @@ with gr.Blocks(
         """
     <div style="text-align: center; margin-bottom: 20px;">
         <h1 style="margin-bottom: 10px;">🔬 Compression-Lens: RWKV-7 vs Qwen3 Byte-Level Comparison</h1>
-        <p style="margin-bottom: 15px; color: #666;">Compare the byte-level prediction performance between <strong>RWKV7-G1C-1.5B</strong> and <strong>Qwen3-1.7B-Base</strong>.</p>
         <div style="display: flex; justify-content: center; align-items: center; gap: 10px;">
             <a href="https://github.com/Jellyfish042/uncheatable_eval" target="_blank" style="text-decoration: none;">
                 <img src="https://img.shields.io/badge/GitHub-Project-181717?logo=github" alt="GitHub Project">

 """
 UncheatableEval Visualization - Hugging Face Space
+Compare byte-level prediction performance between Qwen3-1.7B-Base and RWKV7-G1D-1.5B.
 """
 import gc
 # Model configuration
 QWEN_MODEL_ID = "Qwen/Qwen3-1.7B-Base"
+RWKV_MODEL_URL = "https://huggingface.co/BlinkDL/rwkv7-g1/resolve/main/rwkv7-g1d-1.5b-20260212-ctx8192.pth"
+RWKV_MODEL_FILENAME = "rwkv7-g1d-1.5b-20260212-ctx8192.pth"
 # Get the directory where this script is located
 SCRIPT_DIR = Path(__file__).parent.absolute()
 def load_rwkv7_model(model_path: str):
+    """Load RWKV7-G1D-1.5B model."""
     os.environ["RWKV_JIT_ON"] = "1"
     os.environ["RWKV_V7_ON"] = "1"
     _qwen_model, _qwen_tokenizer = load_qwen_model()
     # Load RWKV7 model
+    print("Loading RWKV7-G1D-1.5B...")
     _rwkv_model, _rwkv_tokenizer = load_rwkv7_model(_rwkv_model_path)
     # Initialize stats manager
             text=text,
             byte_losses_a=result_rwkv["byte_wise_losses"],
             byte_losses_b=result_qwen["byte_wise_losses"],
+            model_a_name="RWKV7-G1D-1.5B",
             model_b_name="Qwen3-1.7B-Base",
             topk_predictions_a=result_rwkv["top5_predictions"],
             topk_predictions_b=result_qwen["top5_predictions"],
         """
     <div style="text-align: center; margin-bottom: 20px;">
         <h1 style="margin-bottom: 10px;">🔬 Compression-Lens: RWKV-7 vs Qwen3 Byte-Level Comparison</h1>
+        <p style="margin-bottom: 15px; color: #666;">Compare the byte-level prediction performance between <strong>RWKV7-G1D-1.5B</strong> and <strong>Qwen3-1.7B-Base</strong>.</p>
         <div style="display: flex; justify-content: center; align-items: center; gap: 10px;">
             <a href="https://github.com/Jellyfish042/uncheatable_eval" target="_blank" style="text-decoration: none;">
                 <img src="https://img.shields.io/badge/GitHub-Project-181717?logo=github" alt="GitHub Project">

precompute_example.py CHANGED Viewed

@@ -23,7 +23,7 @@ PRECOMPUTED_DIR = SCRIPT_DIR / "precomputed"
 # Model configuration
 QWEN_MODEL_ID = "Qwen/Qwen3-1.7B-Base"
-RWKV_MODEL_FILENAME = "rwkv7-g1c-1.5b-20260110-ctx8192.pth"
 # Detect device
 # DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
@@ -69,7 +69,7 @@ def load_qwen_model():
 def load_rwkv7_model(model_path: str):
-    """Load RWKV7-G1C-1.5B model."""
     os.environ["RWKV_JIT_ON"] = "1"
     os.environ["RWKV_V7_ON"] = "1"
@@ -116,7 +116,7 @@ def precompute_example():
     print("Loading Qwen3-1.7B-Base...")
     qwen_model, qwen_tokenizer = load_qwen_model()
-    print("Loading RWKV7-G1C-1.5B...")
     rwkv_model, rwkv_tokenizer = load_rwkv7_model(rwkv_model_path)
     # Run evaluations
@@ -134,7 +134,7 @@ def precompute_example():
         text=example_text,
         byte_losses_a=result_rwkv["byte_wise_losses"],
         byte_losses_b=result_qwen["byte_wise_losses"],
-        model_a_name="RWKV7-G1C-1.5B",
         model_b_name="Qwen3-1.7B-Base",
         topk_predictions_a=result_rwkv["top5_predictions"],
         topk_predictions_b=result_qwen["top5_predictions"],

 # Model configuration
 QWEN_MODEL_ID = "Qwen/Qwen3-1.7B-Base"
+RWKV_MODEL_FILENAME = "rwkv7-g1d-1.5b-20260212-ctx8192.pth"
 # Detect device
 # DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 def load_rwkv7_model(model_path: str):
+    """Load RWKV7-G1D-1.5B model."""
     os.environ["RWKV_JIT_ON"] = "1"
     os.environ["RWKV_V7_ON"] = "1"
     print("Loading Qwen3-1.7B-Base...")
     qwen_model, qwen_tokenizer = load_qwen_model()
+    print("Loading RWKV7-G1D-1.5B...")
     rwkv_model, rwkv_tokenizer = load_rwkv7_model(rwkv_model_path)
     # Run evaluations
         text=example_text,
         byte_losses_a=result_rwkv["byte_wise_losses"],
         byte_losses_b=result_qwen["byte_wise_losses"],
+        model_a_name="RWKV7-G1D-1.5B",
         model_b_name="Qwen3-1.7B-Base",
         topk_predictions_a=result_rwkv["top5_predictions"],
         topk_predictions_b=result_qwen["top5_predictions"],