Spaces:

CultriX
/

Tiny-LeaderBoard

Sleeping

App Files Files Community

CultriX commited on Dec 30, 2024

Commit

7334418

verified ·

1 Parent(s): 0292584

Update scrape-leaderboard.py

Browse files

Files changed (1) hide show

scrape-leaderboard.py +485 -24

scrape-leaderboard.py CHANGED Viewed

@@ -1,5 +1,3 @@
-#!/usr/bin/env python3
 import requests
 from bs4 import BeautifulSoup
@@ -9,7 +7,6 @@ from bs4 import BeautifulSoup
 #    - scores (average, IFEval, BBH, MATH, GPQA, MUSR, MMLU-PRO)
 #    - hf_url: the Hugging Face URL to scrape for a MergeKit config
 #    - known_config: if we already know the configuration, store it here; otherwise None.
 benchmark_data = [
     {
         "rank": 44,
@@ -23,8 +20,8 @@ benchmark_data = [
             "MUSR": 19.39,
             "MMLU-PRO": 48.26
         },
-        "hf_url": "https://huggingface.co/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3",
-        # known_config from your data:
         "known_config": {
             "models": [
                 {"model": "CultriX/SeQwence-14Bv1"},
@@ -68,9 +65,474 @@ benchmark_data = [
         "hf_url": "https://huggingface.co/djuna/Q2.5-Veltha-14B-0.5",
         "known_config": None
     },
-    # ... etc. (Paste in all the rest of your “DATA START” items if desired) ...
 ]
 def scrape_model_page(model_url):
     """
     Scrapes the Hugging Face model page for potential MergeKit YAML configuration
@@ -107,8 +569,7 @@ def print_benchmark_and_config_info(model_info):
     checks for known or scraped MergeKit config, and prints accordingly.
     """
     # Print the benchmark summary
-    print(f"---")
-    print(f"Model Rank: {model_info['rank']}")
     print(f"Model Name: {model_info['name']}")
     print(f"Model average score across benchmarks in %: {model_info['scores']['average']}")
     print(f"Models average score on IFEval benchmarks in %: {model_info['scores']['IFEval']}")
@@ -119,29 +580,28 @@ def print_benchmark_and_config_info(model_info):
     print(f"Models average score in MMLU-PRO benchmarks in %: {model_info['scores']['MMLU-PRO']}")
     # If we have a known config for this model, just print it.
-    if model_info.get("known_config"):
-        config = model_info["known_config"]
         print("###")
-        # Print the config in a YAML-like format
         print("models:")
-        for m in config["models"]:
             print(f"  - model: {m['model']}")
-        print(f"merge_method: {config['merge_method']}")
-        print(f"base_model: {config['base_model']}")
-        print(f"dtype: {config['dtype']}")
         print("parameters:")
-        print(f"  t: {config['parameters']['t']} # V shaped curve: Hermes for input & output, WizardMath in the middle layers")
         print("###")
         return
-    # Otherwise, attempt to scrape the page for a config
     scrape_result = scrape_model_page(model_info["hf_url"])
     if isinstance(scrape_result, str) or ("No YAML configuration found." in scrape_result["yaml_configuration"]):
-        # If it's a string, it's likely an error or "No YAML config"
         print("(No MergeKit configuration found.)\n")
         print("You can try the following Python script to scrape the model page:\n")
         print("#" * 70)
-        print(f'''import requests
 from bs4 import BeautifulSoup
 def scrape_model_page(model_url):
@@ -149,7 +609,7 @@ def scrape_model_page(model_url):
         response = requests.get(model_url)
         if response.status_code != 200:
             return f"Error: Unable to fetch the page (Status Code: {{response.status_code}})"
         soup = BeautifulSoup(response.text, "html.parser")
         yaml_config = soup.find("pre")
@@ -169,16 +629,17 @@ def scrape_model_page(model_url):
 if __name__ == "__main__":
     model_url = "{model_info['hf_url']}"
     result = scrape_model_page(model_url)
-    print(result)''')
         print("#" * 70)
     else:
-        # If we found a config, print it
         print("###")
         print(scrape_result["yaml_configuration"])
         print("###")
 if __name__ == "__main__":
-    # Loop through all models, printing benchmark data and MergeKit config info
     for model in benchmark_data:
-        print_benchmark_and_config_info(model)

 import requests
 from bs4 import BeautifulSoup
 #    - scores (average, IFEval, BBH, MATH, GPQA, MUSR, MMLU-PRO)
 #    - hf_url: the Hugging Face URL to scrape for a MergeKit config
 #    - known_config: if we already know the configuration, store it here; otherwise None.
 benchmark_data = [
     {
         "rank": 44,
             "MUSR": 19.39,
             "MMLU-PRO": 48.26
         },
+        "hf_url": "https://huggingface.co/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3",  # placeholder
+        # This one has a known config from your data
         "known_config": {
             "models": [
                 {"model": "CultriX/SeQwence-14Bv1"},
         "hf_url": "https://huggingface.co/djuna/Q2.5-Veltha-14B-0.5",
         "known_config": None
     },
+    {
+        "rank": 48,
+        "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock",
+        "scores": {
+            "average": 39.81,
+            "IFEval": 71.62,
+            "BBH": 48.76,
+            "MATH": 33.99,
+            "GPQA": 17.34,
+            "MUSR": 19.23,
+            "MMLU-PRO": 47.95
+        },
+        "hf_url": "https://huggingface.co/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock",
+        "known_config": None
+    },
+    {
+        "rank": 50,
+        "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01",
+        "scores": {
+            "average": 39.46,
+            "IFEval": 68.72,
+            "BBH": 47.71,
+            "MATH": 35.05,
+            "GPQA": 18.23,
+            "MUSR": 19.56,
+            "MMLU-PRO": 47.50
+        },
+        "hf_url": "https://huggingface.co/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01",
+        "known_config": None
+    },
+    {
+        "rank": 52,
+        "name": "arcee-ai/Virtuoso-Small",
+        "scores": {
+            "average": 39.43,
+            "IFEval": 79.35,
+            "BBH": 50.40,
+            "MATH": 34.29,
+            "GPQA": 11.52,
+            "MUSR": 14.44,
+            "MMLU-PRO": 46.57
+        },
+        "hf_url": "https://huggingface.co/arcee-ai/Virtuoso-Small",
+        "known_config": None
+    },
+    {
+        "rank": 54,
+        "name": "sometimesanotion/Qwentinuum-14B-v6",
+        "scores": {
+            "average": 39.23,
+            "IFEval": 63.04,
+            "BBH": 50.23,
+            "MATH": 33.84,
+            "GPQA": 18.23,
+            "MUSR": 21.18,
+            "MMLU-PRO": 48.89
+        },
+        "hf_url": "https://huggingface.co/sometimesanotion/Qwentinuum-14B-v6",
+        "known_config": None
+    },
+    {
+        "rank": 55,
+        "name": "djuna/Q2.5-Veltha-14B",
+        "scores": {
+            "average": 39.21,
+            "IFEval": 82.92,
+            "BBH": 49.75,
+            "MATH": 28.02,
+            "GPQA": 14.54,
+            "MUSR": 12.26,
+            "MMLU-PRO": 47.76
+        },
+        "hf_url": "https://huggingface.co/djuna/Q2.5-Veltha-14B",
+        "known_config": None
+    },
+    {
+        "rank": 57,
+        "name": "allknowingroger/QwenSlerp6-14B",
+        "scores": {
+            "average": 39.02,
+            "IFEval": 68.67,
+            "BBH": 47.59,
+            "MATH": 34.14,
+            "GPQA": 16.44,
+            "MUSR": 18.32,
+            "MMLU-PRO": 48.95
+        },
+        "hf_url": "https://huggingface.co/allknowingroger/QwenSlerp6-14B",
+        "known_config": None
+    },
+    {
+        "rank": 58,
+        "name": "allknowingroger/QwenSlerp5-14B",
+        "scores": {
+            "average": 38.94,
+            "IFEval": 71.19,
+            "BBH": 47.39,
+            "MATH": 33.16,
+            "GPQA": 15.32,
+            "MUSR": 17.81,
+            "MMLU-PRO": 48.78
+        },
+        "hf_url": "https://huggingface.co/allknowingroger/QwenSlerp5-14B",
+        "known_config": None
+    },
+    {
+        "rank": 59,
+        "name": "sometimesanotion/Qwentinuum-14B-v5",
+        "scores": {
+            "average": 38.87,
+            "IFEval": 62.86,
+            "BBH": 50.28,
+            "MATH": 31.57,
+            "GPQA": 18.34,
+            "MUSR": 21.09,
+            "MMLU-PRO": 49.09
+        },
+        "hf_url": "https://huggingface.co/sometimesanotion/Qwentinuum-14B-v5",
+        "known_config": None
+    },
+    {
+        "rank": 60,
+        "name": "sometimesanotion/Qwenvergence-14B-v6-Prose",
+        "scores": {
+            "average": 38.82,
+            "IFEval": 59.90,
+            "BBH": 50.12,
+            "MATH": 34.89,
+            "GPQA": 18.46,
+            "MUSR": 21.02,
+            "MMLU-PRO": 48.56
+        },
+        "hf_url": "https://huggingface.co/sometimesanotion/Qwenvergence-14B-v6-Prose",
+        "known_config": None
+    },
+    {
+        "rank": 61,
+        "name": "CultriX/Qwen2.5-14B-Brocav3",
+        "scores": {
+            "average": 38.76,
+            "IFEval": 69.52,
+            "BBH": 49.05,
+            "MATH": 32.25,
+            "GPQA": 14.54,
+            "MUSR": 19.25,
+            "MMLU-PRO": 47.97
+        },
+        "hf_url": "https://huggingface.co/CultriX/Qwen2.5-14B-Brocav3",
+        "known_config": None
+    },
+    {
+        "rank": 62,
+        "name": "sometimesanotion/Qwentinuum-14B-v7",
+        "scores": {
+            "average": 38.76,
+            "IFEval": 61.09,
+            "BBH": 50.35,
+            "MATH": 33.38,
+            "GPQA": 18.79,
+            "MUSR": 19.95,
+            "MMLU-PRO": 49.00
+        },
+        "hf_url": "https://huggingface.co/sometimesanotion/Qwentinuum-14B-v7",
+        "known_config": None
+    },
+    {
+        "rank": 64,
+        "name": "sometimesanotion/Qwentinuum-14B-v3",
+        "scores": {
+            "average": 38.74,
+            "IFEval": 61.58,
+            "BBH": 50.04,
+            "MATH": 32.85,
+            "GPQA": 18.34,
+            "MUSR": 20.62,
+            "MMLU-PRO": 49.03
+        },
+        "hf_url": "https://huggingface.co/sometimesanotion/Qwentinuum-14B-v3",
+        "known_config": None
+    },
+    {
+        "rank": 65,
+        "name": "allura-org/TQ2.5-14B-Aletheia-v1",
+        "scores": {
+            "average": 38.74,
+            "IFEval": 75.30,
+            "BBH": 50.88,
+            "MATH": 29.53,
+            "GPQA": 14.99,
+            "MUSR": 14.61,
+            "MMLU-PRO": 47.12
+        },
+        "hf_url": "https://huggingface.co/allura-org/TQ2.5-14B-Aletheia-v1",
+        "known_config": None
+    },
+    {
+        "rank": 66,
+        "name": "qingy2024/Fusion4-14B-Instruct",
+        "scores": {
+            "average": 38.73,
+            "IFEval": 76.49,
+            "BBH": 50.70,
+            "MATH": 33.91,
+            "GPQA": 10.74,
+            "MUSR": 13.97,
+            "MMLU-PRO": 46.60
+        },
+        "hf_url": "https://huggingface.co/qingy2024/Fusion4-14B-Instruct",
+        "known_config": None
+    },
+    {
+        "rank": 68,
+        "name": "CultriX/Qwen2.5-14B-Brocav7",
+        "scores": {
+            "average": 38.52,
+            "IFEval": 67.24,
+            "BBH": 48.91,
+            "MATH": 31.87,
+            "GPQA": 15.66,
+            "MUSR": 20.15,
+            "MMLU-PRO": 47.31
+        },
+        "hf_url": "https://huggingface.co/CultriX/Qwen2.5-14B-Brocav7",
+        "known_config": None
+    },
+    {
+        "rank": 71,
+        "name": "sometimesanotion/Qwentinuum-14B-v6-Prose",
+        "scores": {
+            "average": 38.46,
+            "IFEval": 56.43,
+            "BBH": 50.14,
+            "MATH": 35.57,
+            "GPQA": 18.46,
+            "MUSR": 21.34,
+            "MMLU-PRO": 48.80
+        },
+        "hf_url": "https://huggingface.co/sometimesanotion/Qwentinuum-14B-v6-Prose",
+        "known_config": None
+    },
+    {
+        "rank": 76,
+        "name": "CultriX/Qwen2.5-14B-Brocav6",
+        "scores": {
+            "average": 38.32,
+            "IFEval": 69.95,
+            "BBH": 47.82,
+            "MATH": 29.61,
+            "GPQA": 15.66,
+            "MUSR": 18.88,
+            "MMLU-PRO": 47.99
+        },
+        "hf_url": "https://huggingface.co/CultriX/Qwen2.5-14B-Brocav6",
+        "known_config": None
+    },
+    {
+        "rank": 80,
+        "name": "CultriX/SeQwence-14Bv1",
+        "scores": {
+            "average": 38.20,
+            "IFEval": 66.78,
+            "BBH": 47.19,
+            "MATH": 33.53,
+            "GPQA": 14.88,
+            "MUSR": 18.80,
+            "MMLU-PRO": 48.00
+        },
+        "hf_url": "https://huggingface.co/CultriX/SeQwence-14Bv1",
+        "known_config": None
+    },
+    {
+        "rank": 85,
+        "name": "sometimesanotion/Qwentinuum-14B-v013",
+        "scores": {
+            "average": 37.96,
+            "IFEval": 67.11,
+            "BBH": 43.97,
+            "MATH": 33.01,
+            "GPQA": 14.32,
+            "MUSR": 24.99,
+            "MMLU-PRO": 44.34
+        },
+        "hf_url": "https://huggingface.co/sometimesanotion/Qwentinuum-14B-v013",
+        "known_config": None
+    },
+    {
+        "rank": 86,
+        "name": "CultriX/Qwen2.5-14B-Wernickev3",
+        "scores": {
+            "average": 37.94,
+            "IFEval": 70.48,
+            "BBH": 44.58,
+            "MATH": 32.78,
+            "GPQA": 14.99,
+            "MUSR": 18.69,
+            "MMLU-PRO": 46.13
+        },
+        "hf_url": "https://huggingface.co/CultriX/Qwen2.5-14B-Wernickev3",
+        "known_config": None
+    },
+    {
+        "rank": 88,
+        "name": "allknowingroger/QwenSlerp4-14B",
+        "scores": {
+            "average": 37.80,
+            "IFEval": 63.28,
+            "BBH": 49.38,
+            "MATH": 30.97,
+            "GPQA": 16.33,
+            "MUSR": 17.59,
+            "MMLU-PRO": 49.28
+        },
+        "hf_url": "https://huggingface.co/allknowingroger/QwenSlerp4-14B",
+        "known_config": None
+    },
+    {
+        "rank": 89,
+        "name": "CultriX/Qwen2.5-14B-Broca",
+        "scores": {
+            "average": 37.72,
+            "IFEval": 56.04,
+            "BBH": 50.03,
+            "MATH": 34.59,
+            "GPQA": 18.23,
+            "MUSR": 18.95,
+            "MMLU-PRO": 48.49
+        },
+        "hf_url": "https://huggingface.co/CultriX/Qwen2.5-14B-Broca",
+        "known_config": None
+    },
+    {
+        "rank": 90,
+        "name": "CultriX/Qwen2.5-14B-Emerged",
+        "scores": {
+            "average": 37.66,
+            "IFEval": 70.00,
+            "BBH": 45.93,
+            "MATH": 30.74,
+            "GPQA": 14.32,
+            "MUSR": 18.47,
+            "MMLU-PRO": 46.51
+        },
+        "hf_url": "https://huggingface.co/CultriX/Qwen2.5-14B-Emerged",
+        "known_config": None
+    },
+    {
+        "rank": 91,
+        "name": "sometimesanotion/Qwentinuum-14B-v8",
+        "scores": {
+            "average": 37.65,
+            "IFEval": 54.12,
+            "BBH": 50.11,
+            "MATH": 34.14,
+            "GPQA": 17.79,
+            "MUSR": 20.75,
+            "MMLU-PRO": 49.02
+        },
+        "hf_url": "https://huggingface.co/sometimesanotion/Qwentinuum-14B-v8",
+        "known_config": None
+    },
+    {
+        "rank": 92,
+        "name": "qingy2024/Fusion-14B-Instruct",
+        "scores": {
+            "average": 37.64,
+            "IFEval": 72.60,
+            "BBH": 48.58,
+            "MATH": 30.97,
+            "GPQA": 13.98,
+            "MUSR": 14.81,
+            "MMLU-PRO": 44.93
+        },
+        "hf_url": "https://huggingface.co/qingy2024/Fusion-14B-Instruct",
+        "known_config": None
+    },
+    {
+        "rank": 94,
+        "name": "CultriX/Qwestion-14B",
+        "scores": {
+            "average": 37.63,
+            "IFEval": 63.18,
+            "BBH": 48.76,
+            "MATH": 31.72,
+            "GPQA": 15.77,
+            "MUSR": 17.22,
+            "MMLU-PRO": 49.14
+        },
+        "hf_url": "https://huggingface.co/CultriX/Qwestion-14B",
+        "known_config": None
+    },
+    {
+        "rank": 99,
+        "name": "sometimesanotion/Qwenvergence-14B-v3-Prose",
+        "scores": {
+            "average": 37.37,
+            "IFEval": 49.18,
+            "BBH": 49.80,
+            "MATH": 35.57,
+            "GPQA": 19.35,
+            "MUSR": 21.77,
+            "MMLU-PRO": 48.55
+        },
+        "hf_url": "https://huggingface.co/sometimesanotion/Qwenvergence-14B-v3-Prose",
+        "known_config": None
+    },
+    {
+        "rank": 102,
+        "name": "CultriX/SeQwence-14B-v5",
+        "scores": {
+            "average": 37.27,
+            "IFEval": 59.20,
+            "BBH": 50.00,
+            "MATH": 31.04,
+            "GPQA": 16.00,
+            "MUSR": 18.33,
+            "MMLU-PRO": 49.05
+        },
+        "hf_url": "https://huggingface.co/CultriX/SeQwence-14B-v5",
+        "known_config": None
+    },
+    {
+        "rank": 103,
+        "name": "sometimesanotion/Qwen-14B-ProseStock-v4",
+        "scores": {
+            "average": 37.23,
+            "IFEval": 49.42,
+            "BBH": 49.54,
+            "MATH": 35.50,
+            "GPQA": 18.46,
+            "MUSR": 21.70,
+            "MMLU-PRO": 48.74
+        },
+        "hf_url": "https://huggingface.co/sometimesanotion/Qwen-14B-ProseStock-v4",
+        "known_config": None
+    },
+    {
+        "rank": 104,
+        "name": "sometimesanotion/IF-reasoning-experiment-40",
+        "scores": {
+            "average": 37.21,
+            "IFEval": 63.30,
+            "BBH": 44.31,
+            "MATH": 27.72,
+            "GPQA": 17.34,
+            "MUSR": 25.86,
+            "MMLU-PRO": 44.72
+        },
+        "hf_url": "https://huggingface.co/sometimesanotion/IF-reasoning-experiment-40",
+        "known_config": None
+    },
+    {
+        "rank": 105,
+        "name": "CultriX/SeQwence-14B-EvolMerge",
+        "scores": {
+            "average": 37.20,
+            "IFEval": 53.82,
+            "BBH": 50.78,
+            "MATH": 31.80,
+            "GPQA": 17.45,
+            "MUSR": 20.26,
+            "MMLU-PRO": 49.10
+        },
+        "hf_url": "https://huggingface.co/CultriX/SeQwence-14B-EvolMerge",
+        "known_config": None
+    }
 ]
 def scrape_model_page(model_url):
     """
     Scrapes the Hugging Face model page for potential MergeKit YAML configuration
     checks for known or scraped MergeKit config, and prints accordingly.
     """
     # Print the benchmark summary
+    print(f"---\nModel Rank: {model_info['rank']}")
     print(f"Model Name: {model_info['name']}")
     print(f"Model average score across benchmarks in %: {model_info['scores']['average']}")
     print(f"Models average score on IFEval benchmarks in %: {model_info['scores']['IFEval']}")
     print(f"Models average score in MMLU-PRO benchmarks in %: {model_info['scores']['MMLU-PRO']}")
     # If we have a known config for this model, just print it.
+    if model_info["known_config"] is not None:
         print("###")
         print("models:")
+        for m in model_info["known_config"]["models"]:
             print(f"  - model: {m['model']}")
+        print(f"merge_method: {model_info['known_config']['merge_method']}")
+        print(f"base_model: {model_info['known_config']['base_model']}")
+        print(f"dtype: {model_info['known_config']['dtype']}")
         print("parameters:")
+        print(f"  t: {model_info['known_config']['parameters']['t']} # V shaped curve: Hermes for input & output, WizardMath in the middle layers")
         print("###")
         return
+    # Otherwise, attempt to scrape the model page
     scrape_result = scrape_model_page(model_info["hf_url"])
+    # If we got an error or can't find YAML, we show the scraping code
     if isinstance(scrape_result, str) or ("No YAML configuration found." in scrape_result["yaml_configuration"]):
         print("(No MergeKit configuration found.)\n")
         print("You can try the following Python script to scrape the model page:\n")
         print("#" * 70)
+        print(
+f'''import requests
 from bs4 import BeautifulSoup
 def scrape_model_page(model_url):
         response = requests.get(model_url)
         if response.status_code != 200:
             return f"Error: Unable to fetch the page (Status Code: {{response.status_code}})"
         soup = BeautifulSoup(response.text, "html.parser")
         yaml_config = soup.find("pre")
 if __name__ == "__main__":
     model_url = "{model_info['hf_url']}"
     result = scrape_model_page(model_url)
+    print(result)'''
+        )
         print("#" * 70)
     else:
+        # If we found a config, print it between triple-hash signs
         print("###")
         print(scrape_result["yaml_configuration"])
         print("###")
 if __name__ == "__main__":
+    # 2. Loop through all models, printing benchmark data and MergeKit config info
     for model in benchmark_data:
+        print_benchmark_and_config_info(model)