Spaces:
Sleeping
Sleeping
Commit ·
3d88788
1
Parent(s): 7a459aa
update to g1d
Browse files- README.md +1 -1
- app.py +7 -7
- precompute_example.py +4 -4
README.md
CHANGED
|
@@ -36,7 +36,7 @@ Compare the byte-level prediction performance between models.
|
|
| 36 |
| Model | Type | Parameters | Architecture |
|
| 37 |
|-------|------|------------|--------------|
|
| 38 |
| Qwen3-1.7B-Base | Transformer | 1.7B | Dense attention |
|
| 39 |
-
| RWKV7-
|
| 40 |
|
| 41 |
## Technical Details
|
| 42 |
|
|
|
|
| 36 |
| Model | Type | Parameters | Architecture |
|
| 37 |
|-------|------|------------|--------------|
|
| 38 |
| Qwen3-1.7B-Base | Transformer | 1.7B | Dense attention |
|
| 39 |
+
| RWKV7-G1D-1.5B | RWKV | 1.5B | Linear attention |
|
| 40 |
|
| 41 |
## Technical Details
|
| 42 |
|
app.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
"""
|
| 2 |
UncheatableEval Visualization - Hugging Face Space
|
| 3 |
|
| 4 |
-
Compare byte-level prediction performance between Qwen3-1.7B-Base and RWKV7-
|
| 5 |
"""
|
| 6 |
|
| 7 |
import gc
|
|
@@ -19,8 +19,8 @@ IS_CPU = DEVICE == "cpu"
|
|
| 19 |
|
| 20 |
# Model configuration
|
| 21 |
QWEN_MODEL_ID = "Qwen/Qwen3-1.7B-Base"
|
| 22 |
-
RWKV_MODEL_URL = "https://huggingface.co/BlinkDL/rwkv7-g1/resolve/main/rwkv7-
|
| 23 |
-
RWKV_MODEL_FILENAME = "rwkv7-
|
| 24 |
|
| 25 |
# Get the directory where this script is located
|
| 26 |
SCRIPT_DIR = Path(__file__).parent.absolute()
|
|
@@ -85,7 +85,7 @@ def load_qwen_model():
|
|
| 85 |
|
| 86 |
|
| 87 |
def load_rwkv7_model(model_path: str):
|
| 88 |
-
"""Load RWKV7-
|
| 89 |
os.environ["RWKV_JIT_ON"] = "1"
|
| 90 |
os.environ["RWKV_V7_ON"] = "1"
|
| 91 |
|
|
@@ -172,7 +172,7 @@ def initialize_models():
|
|
| 172 |
_qwen_model, _qwen_tokenizer = load_qwen_model()
|
| 173 |
|
| 174 |
# Load RWKV7 model
|
| 175 |
-
print("Loading RWKV7-
|
| 176 |
_rwkv_model, _rwkv_tokenizer = load_rwkv7_model(_rwkv_model_path)
|
| 177 |
|
| 178 |
# Initialize stats manager
|
|
@@ -267,7 +267,7 @@ def run_evaluation(text: str, progress=gr.Progress()):
|
|
| 267 |
text=text,
|
| 268 |
byte_losses_a=result_rwkv["byte_wise_losses"],
|
| 269 |
byte_losses_b=result_qwen["byte_wise_losses"],
|
| 270 |
-
model_a_name="RWKV7-
|
| 271 |
model_b_name="Qwen3-1.7B-Base",
|
| 272 |
topk_predictions_a=result_rwkv["top5_predictions"],
|
| 273 |
topk_predictions_b=result_qwen["top5_predictions"],
|
|
@@ -352,7 +352,7 @@ with gr.Blocks(
|
|
| 352 |
"""
|
| 353 |
<div style="text-align: center; margin-bottom: 20px;">
|
| 354 |
<h1 style="margin-bottom: 10px;">🔬 Compression-Lens: RWKV-7 vs Qwen3 Byte-Level Comparison</h1>
|
| 355 |
-
<p style="margin-bottom: 15px; color: #666;">Compare the byte-level prediction performance between <strong>RWKV7-
|
| 356 |
<div style="display: flex; justify-content: center; align-items: center; gap: 10px;">
|
| 357 |
<a href="https://github.com/Jellyfish042/uncheatable_eval" target="_blank" style="text-decoration: none;">
|
| 358 |
<img src="https://img.shields.io/badge/GitHub-Project-181717?logo=github" alt="GitHub Project">
|
|
|
|
| 1 |
"""
|
| 2 |
UncheatableEval Visualization - Hugging Face Space
|
| 3 |
|
| 4 |
+
Compare byte-level prediction performance between Qwen3-1.7B-Base and RWKV7-G1D-1.5B.
|
| 5 |
"""
|
| 6 |
|
| 7 |
import gc
|
|
|
|
| 19 |
|
| 20 |
# Model configuration
|
| 21 |
QWEN_MODEL_ID = "Qwen/Qwen3-1.7B-Base"
|
| 22 |
+
RWKV_MODEL_URL = "https://huggingface.co/BlinkDL/rwkv7-g1/resolve/main/rwkv7-g1d-1.5b-20260212-ctx8192.pth"
|
| 23 |
+
RWKV_MODEL_FILENAME = "rwkv7-g1d-1.5b-20260212-ctx8192.pth"
|
| 24 |
|
| 25 |
# Get the directory where this script is located
|
| 26 |
SCRIPT_DIR = Path(__file__).parent.absolute()
|
|
|
|
| 85 |
|
| 86 |
|
| 87 |
def load_rwkv7_model(model_path: str):
|
| 88 |
+
"""Load RWKV7-G1D-1.5B model."""
|
| 89 |
os.environ["RWKV_JIT_ON"] = "1"
|
| 90 |
os.environ["RWKV_V7_ON"] = "1"
|
| 91 |
|
|
|
|
| 172 |
_qwen_model, _qwen_tokenizer = load_qwen_model()
|
| 173 |
|
| 174 |
# Load RWKV7 model
|
| 175 |
+
print("Loading RWKV7-G1D-1.5B...")
|
| 176 |
_rwkv_model, _rwkv_tokenizer = load_rwkv7_model(_rwkv_model_path)
|
| 177 |
|
| 178 |
# Initialize stats manager
|
|
|
|
| 267 |
text=text,
|
| 268 |
byte_losses_a=result_rwkv["byte_wise_losses"],
|
| 269 |
byte_losses_b=result_qwen["byte_wise_losses"],
|
| 270 |
+
model_a_name="RWKV7-G1D-1.5B",
|
| 271 |
model_b_name="Qwen3-1.7B-Base",
|
| 272 |
topk_predictions_a=result_rwkv["top5_predictions"],
|
| 273 |
topk_predictions_b=result_qwen["top5_predictions"],
|
|
|
|
| 352 |
"""
|
| 353 |
<div style="text-align: center; margin-bottom: 20px;">
|
| 354 |
<h1 style="margin-bottom: 10px;">🔬 Compression-Lens: RWKV-7 vs Qwen3 Byte-Level Comparison</h1>
|
| 355 |
+
<p style="margin-bottom: 15px; color: #666;">Compare the byte-level prediction performance between <strong>RWKV7-G1D-1.5B</strong> and <strong>Qwen3-1.7B-Base</strong>.</p>
|
| 356 |
<div style="display: flex; justify-content: center; align-items: center; gap: 10px;">
|
| 357 |
<a href="https://github.com/Jellyfish042/uncheatable_eval" target="_blank" style="text-decoration: none;">
|
| 358 |
<img src="https://img.shields.io/badge/GitHub-Project-181717?logo=github" alt="GitHub Project">
|
precompute_example.py
CHANGED
|
@@ -23,7 +23,7 @@ PRECOMPUTED_DIR = SCRIPT_DIR / "precomputed"
|
|
| 23 |
|
| 24 |
# Model configuration
|
| 25 |
QWEN_MODEL_ID = "Qwen/Qwen3-1.7B-Base"
|
| 26 |
-
RWKV_MODEL_FILENAME = "rwkv7-
|
| 27 |
|
| 28 |
# Detect device
|
| 29 |
# DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
@@ -69,7 +69,7 @@ def load_qwen_model():
|
|
| 69 |
|
| 70 |
|
| 71 |
def load_rwkv7_model(model_path: str):
|
| 72 |
-
"""Load RWKV7-
|
| 73 |
os.environ["RWKV_JIT_ON"] = "1"
|
| 74 |
os.environ["RWKV_V7_ON"] = "1"
|
| 75 |
|
|
@@ -116,7 +116,7 @@ def precompute_example():
|
|
| 116 |
print("Loading Qwen3-1.7B-Base...")
|
| 117 |
qwen_model, qwen_tokenizer = load_qwen_model()
|
| 118 |
|
| 119 |
-
print("Loading RWKV7-
|
| 120 |
rwkv_model, rwkv_tokenizer = load_rwkv7_model(rwkv_model_path)
|
| 121 |
|
| 122 |
# Run evaluations
|
|
@@ -134,7 +134,7 @@ def precompute_example():
|
|
| 134 |
text=example_text,
|
| 135 |
byte_losses_a=result_rwkv["byte_wise_losses"],
|
| 136 |
byte_losses_b=result_qwen["byte_wise_losses"],
|
| 137 |
-
model_a_name="RWKV7-
|
| 138 |
model_b_name="Qwen3-1.7B-Base",
|
| 139 |
topk_predictions_a=result_rwkv["top5_predictions"],
|
| 140 |
topk_predictions_b=result_qwen["top5_predictions"],
|
|
|
|
| 23 |
|
| 24 |
# Model configuration
|
| 25 |
QWEN_MODEL_ID = "Qwen/Qwen3-1.7B-Base"
|
| 26 |
+
RWKV_MODEL_FILENAME = "rwkv7-g1d-1.5b-20260212-ctx8192.pth"
|
| 27 |
|
| 28 |
# Detect device
|
| 29 |
# DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
| 69 |
|
| 70 |
|
| 71 |
def load_rwkv7_model(model_path: str):
|
| 72 |
+
"""Load RWKV7-G1D-1.5B model."""
|
| 73 |
os.environ["RWKV_JIT_ON"] = "1"
|
| 74 |
os.environ["RWKV_V7_ON"] = "1"
|
| 75 |
|
|
|
|
| 116 |
print("Loading Qwen3-1.7B-Base...")
|
| 117 |
qwen_model, qwen_tokenizer = load_qwen_model()
|
| 118 |
|
| 119 |
+
print("Loading RWKV7-G1D-1.5B...")
|
| 120 |
rwkv_model, rwkv_tokenizer = load_rwkv7_model(rwkv_model_path)
|
| 121 |
|
| 122 |
# Run evaluations
|
|
|
|
| 134 |
text=example_text,
|
| 135 |
byte_losses_a=result_rwkv["byte_wise_losses"],
|
| 136 |
byte_losses_b=result_qwen["byte_wise_losses"],
|
| 137 |
+
model_a_name="RWKV7-G1D-1.5B",
|
| 138 |
model_b_name="Qwen3-1.7B-Base",
|
| 139 |
topk_predictions_a=result_rwkv["top5_predictions"],
|
| 140 |
topk_predictions_b=result_qwen["top5_predictions"],
|