Spaces:

Jellyfish042
/

UncheatableEval

Running

App Files Files Community

Jellyfish042 commited on Jan 21

Commit

1be7b2f

1 Parent(s): c1b1328

update

Browse files

Files changed (1) hide show

app.py +123 -85

app.py CHANGED Viewed

@@ -40,15 +40,18 @@ metric_to_sheet = {
     "Bits Per Character (BPC)": "bpc",
     "Bits Per Byte (BPB)": "bpb",
 }
-model_size_to_file_name = {
-    ">20B": "20b+",
     "~14B": "14b",
     # "~9B": "9b",
     "~7B": "7b",
     "~3B": "3b",
     "~1.5B": "1b5",
-    "Other": "other",
-}
 def read_about_md():
@@ -264,27 +267,33 @@ def filter_pareto_frontier(x_values, y_values, names):
         return [], [], []
-def fit_power_law_with_offset(x_values, y_values):
-    """
-    使用带偏置的幂律拟合原始数据
-    返回: (params, raw_rmse, log_rmse, fit_x, fit_y)
-    """
     x_arr = np.array(x_values)
     y_arr = np.array(y_values)
-    # 初始参数估计
-    # 使用简单的幂律拟合作为初始值
-    log_x = np.log10(x_arr)
-    log_y = np.log10(y_arr)
-    slope, intercept = np.polyfit(log_x, log_y, 1)
-    a_init = 10**intercept
-    b_init = slope
-    c_init = 0  # 偏置初始值设为0
-    try:
-        # 使用curve_fit进行非线性拟合
-        params, _ = curve_fit(
             power_law_with_offset,
             x_arr,
             y_arr,
@@ -304,14 +313,12 @@ def fit_power_law_with_offset(x_values, y_values):
         log_y_pred = np.log10(y_pred)
         log_rmse = np.sqrt(np.mean((log_y_actual - log_y_pred) ** 2))
-        # 生成拟合曲线的点
-        x_min, x_max = min(x_values), max(x_values)
-        fit_x = np.linspace(x_min * 0.8, x_max * 1.2, 100)
-        fit_y = power_law_with_offset(fit_x, a, b, c)
-        return params, raw_rmse, log_rmse, fit_x, fit_y
-    except Exception as e:
-        print(f"Fitting failed: {e}")
         # 如果拟合失败，返回简单幂律拟合结果
         a = a_init
         b = b_init
@@ -328,11 +335,9 @@ def fit_power_law_with_offset(x_values, y_values):
         log_y_pred = np.log10(y_pred)
         log_rmse = np.sqrt(np.mean((log_y_actual - log_y_pred) ** 2))
-        x_min, x_max = min(x_values), max(x_values)
-        fit_x = np.linspace(x_min * 0.8, x_max * 1.2, 100)
-        fit_y = a * np.power(fit_x, b)
-        return params, raw_rmse, log_rmse, fit_x, fit_y
 def create_scaling_plot(data_manager: DataManager, period: str, use_pareto: bool = False):
@@ -373,16 +378,32 @@ def create_scaling_plot(data_manager: DataManager, period: str, use_pareto: bool
     else:
         fit_x_values, fit_y_values, fit_names = x_values, y_values, names
-    x_min, x_max = np.log10(min(x_values)), np.log10(max(x_values))
-    y_min, y_max = np.log10(min(y_values)), np.log10(max(y_values))
-    x_dtick = (x_max - x_min) / 4
-    y_dtick = (y_max - y_min) / 4
-    # 使用筛选后的数据进行拟合
-    params, raw_rmse, log_rmse, fit_x, fit_y = fit_power_law_with_offset(fit_x_values, fit_y_values)
-    a, b, c = params
-    fig = go.Figure()
     # 添加所有数据点
     fig.add_trace(
@@ -423,16 +444,16 @@ def create_scaling_plot(data_manager: DataManager, period: str, use_pareto: bool
         fit_label = f"{fit_type}: y = {a:.2f} × x^{b:.3f}<br>Raw RMSE: {raw_rmse:.2f}, Log-RMSE: {log_rmse:.3f}"
     else:
         fit_label = f"{fit_type}: y = {a:.2f} × x^{b:.3f} + {c:.2f}<br>Raw RMSE: {raw_rmse:.2f}, Log-RMSE: {log_rmse:.3f}"
-    fig.add_trace(
-        go.Scatter(
-            x=fit_x.tolist(),
-            y=fit_y.tolist(),
-            mode="lines",
-            name=fit_label,
-            line=dict(color="#FF6B6B", width=2, dash="dash"),
-            hoverinfo="skip",
-        )
-    )
     title_suffix = " (Pareto Frontier)" if use_pareto else ""
     fig.update_layout(
@@ -550,8 +571,15 @@ def create_category_scaling_plot(data_manager: DataManager, period: str, selecte
                 fit_x_vals, fit_y_vals, fit_name_vals = x_vals, y_vals, name_vals
             # 使用筛选后的数据进行拟合
-            params, raw_rmse, log_rmse, fit_x, fit_y = fit_power_law_with_offset(fit_x_vals, fit_y_vals)
-            a, b, c = params
             # 构建数据集名称列表（用于hover显示）
             datasets_label = f"Average of {len(selected_datasets)} datasets"
@@ -595,16 +623,16 @@ def create_category_scaling_plot(data_manager: DataManager, period: str, selecte
                 fit_label = f"{fit_type}: y = {a:.2f} × x^{b:.3f}<br>Raw RMSE: {raw_rmse:.2f}, Log-RMSE: {log_rmse:.3f}"
             else:
                 fit_label = f"{fit_type}: y = {a:.2f} × x^{b:.3f} + {c:.2f}<br>Raw RMSE: {raw_rmse:.2f}, Log-RMSE: {log_rmse:.3f}"
-            fig.add_trace(
-                go.Scatter(
-                    x=fit_x.tolist(),
-                    y=fit_y.tolist(),
-                    mode="lines",
-                    name=fit_label,
-                    line=dict(color="#FF6B6B", width=2, dash="dash"),
-                    hoverinfo="skip",
-                )
-            )
     else:
         # 单独显示模式：为每个数据集创建散点图和拟合线
         for idx, dataset in enumerate(selected_datasets):
@@ -639,8 +667,15 @@ def create_category_scaling_plot(data_manager: DataManager, period: str, selecte
                 fit_x_vals, fit_y_vals, fit_name_vals = x_vals, y_vals, name_vals
             # 使用筛选后的数据进行拟合
-            params, raw_rmse, log_rmse, fit_x, fit_y = fit_power_law_with_offset(fit_x_vals, fit_y_vals)
-            a, b, c = params
             # 添加所有数据点
             fig.add_trace(
@@ -683,18 +718,18 @@ def create_category_scaling_plot(data_manager: DataManager, period: str, selecte
                 fit_label = f"{dataset} {fit_type}: y = {a:.2f} × x^{b:.3f}<br>Raw RMSE: {raw_rmse:.2f}, Log-RMSE: {log_rmse:.3f}"
             else:
                 fit_label = f"{dataset} {fit_type}: y = {a:.2f} × x^{b:.3f} + {c:.2f}<br>Raw RMSE: {raw_rmse:.2f}, Log-RMSE: {log_rmse:.3f}"
-            fig.add_trace(
-                go.Scatter(
-                    x=fit_x.tolist(),
-                    y=fit_y.tolist(),
-                    mode="lines",
-                    name=fit_label,
-                    line=dict(color=color, width=2, dash="dash"),
-                    hoverinfo="skip",
-                    legendgroup=dataset,
-                    showlegend=True,
-                )
-            )
     if not all_x_values or not all_y_values:
         fig = go.Figure()
@@ -702,10 +737,13 @@ def create_category_scaling_plot(data_manager: DataManager, period: str, selecte
         return fig
     # 计算全局坐标范围
-    x_min, x_max = np.log10(min(all_x_values)), np.log10(max(all_x_values))
-    y_min, y_max = np.log10(min(all_y_values)), np.log10(max(all_y_values))
-    x_dtick = (x_max - x_min) / 4
-    y_dtick = (y_max - y_min) / 4
     fig.update_layout(
         title={"text": "Scaling Law by Dataset", "x": 0.5, "xanchor": "center", "yanchor": "top"},

     "Bits Per Character (BPC)": "bpc",
     "Bits Per Byte (BPB)": "bpb",
 }
+model_size_to_file_name = {
+    ">20B": "20b+",
     "~14B": "14b",
     # "~9B": "9b",
     "~7B": "7b",
     "~3B": "3b",
     "~1.5B": "1b5",
+    "Other": "other",
+}
+SCALING_EXTRAPOLATE_MAX_B = 10000
+SCALING_FIT_POINTS = 200
+FIT_LINE_HOVER_TEMPLATE = "Params: %{x:.2f}B<br>Predicted CR: %{y:.2f}%<extra></extra>"
 def read_about_md():
         return [], [], []
+def fit_power_law_with_offset(x_values, y_values, extrapolate_max_b=None, num_points=SCALING_FIT_POINTS):
+    """
+    使用带偏置的幂律拟合原始数据
+    返回: (params, raw_rmse, log_rmse, fit_x, fit_y)
+    """
     x_arr = np.array(x_values)
     y_arr = np.array(y_values)
+    # 初始参数估计
+    # 使用简单的幂律拟合作为初始值
+    log_x = np.log10(x_arr)
+    log_y = np.log10(y_arr)
+    slope, intercept = np.polyfit(log_x, log_y, 1)
+    a_init = 10**intercept
+    b_init = slope
+    c_init = 0  # 偏置初始值设为0
+    x_min, x_max = x_arr.min(), x_arr.max()
+    x_start = max(x_min * 0.8, np.finfo(float).tiny)
+    x_end = x_max * 1.2
+    if extrapolate_max_b is not None:
+        x_end = max(x_end, extrapolate_max_b)
+    fit_x = np.logspace(np.log10(x_start), np.log10(x_end), num_points)
+    try:
+        # 使用curve_fit进行非线性拟合
+        params, _ = curve_fit(
             power_law_with_offset,
             x_arr,
             y_arr,
         log_y_pred = np.log10(y_pred)
         log_rmse = np.sqrt(np.mean((log_y_actual - log_y_pred) ** 2))
+        # 生成拟合曲线的点
+        fit_y = power_law_with_offset(fit_x, a, b, c)
+        return params, raw_rmse, log_rmse, fit_x, fit_y
+    except Exception as e:
+        print(f"Fitting failed: {e}")
         # 如果拟合失败，返回简单幂律拟合结果
         a = a_init
         b = b_init
         log_y_pred = np.log10(y_pred)
         log_rmse = np.sqrt(np.mean((log_y_actual - log_y_pred) ** 2))
+        fit_y = a * np.power(fit_x, b)
+        return params, raw_rmse, log_rmse, fit_x, fit_y
 def create_scaling_plot(data_manager: DataManager, period: str, use_pareto: bool = False):
     else:
         fit_x_values, fit_y_values, fit_names = x_values, y_values, names
+    x_min_val = min(x_values)
+    x_max_val = max(x_values)
+    x_axis_max = x_max_val
+    # 使用筛选后的数据进行拟合
+    params, raw_rmse, log_rmse, fit_x, fit_y = fit_power_law_with_offset(
+        fit_x_values,
+        fit_y_values,
+        extrapolate_max_b=SCALING_EXTRAPOLATE_MAX_B,
+    )
+    a, b, c = params
+    y_min_val = min(y_values)
+    y_max_val = max(y_values)
+    positive_fit_y = fit_y[fit_y > 0]
+    if positive_fit_y.size:
+        y_min_val = min(y_min_val, float(positive_fit_y.min()))
+        y_max_val = max(y_max_val, float(positive_fit_y.max()))
+    x_min = np.log10(x_min_val)
+    x_max = np.log10(x_axis_max)
+    y_min = np.log10(y_min_val)
+    y_max = np.log10(y_max_val)
+    x_dtick = (x_max - x_min) / 4
+    y_dtick = (y_max - y_min) / 4
+    fig = go.Figure()
     # 添加所有数据点
     fig.add_trace(
         fit_label = f"{fit_type}: y = {a:.2f} × x^{b:.3f}<br>Raw RMSE: {raw_rmse:.2f}, Log-RMSE: {log_rmse:.3f}"
     else:
         fit_label = f"{fit_type}: y = {a:.2f} × x^{b:.3f} + {c:.2f}<br>Raw RMSE: {raw_rmse:.2f}, Log-RMSE: {log_rmse:.3f}"
+    fig.add_trace(
+        go.Scatter(
+            x=fit_x.tolist(),
+            y=fit_y.tolist(),
+            mode="lines",
+            name=fit_label,
+            line=dict(color="#FF6B6B", width=2, dash="dash"),
+            hovertemplate=FIT_LINE_HOVER_TEMPLATE,
+        )
+    )
     title_suffix = " (Pareto Frontier)" if use_pareto else ""
     fig.update_layout(
                 fit_x_vals, fit_y_vals, fit_name_vals = x_vals, y_vals, name_vals
             # 使用筛选后的数据进行拟合
+            params, raw_rmse, log_rmse, fit_x, fit_y = fit_power_law_with_offset(
+                fit_x_vals,
+                fit_y_vals,
+                extrapolate_max_b=SCALING_EXTRAPOLATE_MAX_B,
+            )
+            a, b, c = params
+            positive_fit_y = fit_y[fit_y > 0]
+            if positive_fit_y.size:
+                all_y_values.extend(positive_fit_y.tolist())
             # 构建数据集名称列表（用于hover显示）
             datasets_label = f"Average of {len(selected_datasets)} datasets"
                 fit_label = f"{fit_type}: y = {a:.2f} × x^{b:.3f}<br>Raw RMSE: {raw_rmse:.2f}, Log-RMSE: {log_rmse:.3f}"
             else:
                 fit_label = f"{fit_type}: y = {a:.2f} × x^{b:.3f} + {c:.2f}<br>Raw RMSE: {raw_rmse:.2f}, Log-RMSE: {log_rmse:.3f}"
+            fig.add_trace(
+                go.Scatter(
+                    x=fit_x.tolist(),
+                    y=fit_y.tolist(),
+                    mode="lines",
+                    name=fit_label,
+                    line=dict(color="#FF6B6B", width=2, dash="dash"),
+                    hovertemplate=FIT_LINE_HOVER_TEMPLATE,
+                )
+            )
     else:
         # 单独显示模式：为每个数据集创建散点图和拟合线
         for idx, dataset in enumerate(selected_datasets):
                 fit_x_vals, fit_y_vals, fit_name_vals = x_vals, y_vals, name_vals
             # 使用筛选后的数据进行拟合
+            params, raw_rmse, log_rmse, fit_x, fit_y = fit_power_law_with_offset(
+                fit_x_vals,
+                fit_y_vals,
+                extrapolate_max_b=SCALING_EXTRAPOLATE_MAX_B,
+            )
+            a, b, c = params
+            positive_fit_y = fit_y[fit_y > 0]
+            if positive_fit_y.size:
+                all_y_values.extend(positive_fit_y.tolist())
             # 添加所有数据点
             fig.add_trace(
                 fit_label = f"{dataset} {fit_type}: y = {a:.2f} × x^{b:.3f}<br>Raw RMSE: {raw_rmse:.2f}, Log-RMSE: {log_rmse:.3f}"
             else:
                 fit_label = f"{dataset} {fit_type}: y = {a:.2f} × x^{b:.3f} + {c:.2f}<br>Raw RMSE: {raw_rmse:.2f}, Log-RMSE: {log_rmse:.3f}"
+            fig.add_trace(
+                go.Scatter(
+                    x=fit_x.tolist(),
+                    y=fit_y.tolist(),
+                    mode="lines",
+                    name=fit_label,
+                    line=dict(color=color, width=2, dash="dash"),
+                    hovertemplate=FIT_LINE_HOVER_TEMPLATE,
+                    legendgroup=dataset,
+                    showlegend=True,
+                )
+            )
     if not all_x_values or not all_y_values:
         fig = go.Figure()
         return fig
     # 计算全局坐标范围
+    x_min_val = min(all_x_values)
+    x_max_val = max(all_x_values)
+    x_axis_max = x_max_val
+    x_min, x_max = np.log10(x_min_val), np.log10(x_axis_max)
+    y_min, y_max = np.log10(min(all_y_values)), np.log10(max(all_y_values))
+    x_dtick = (x_max - x_min) / 4
+    y_dtick = (y_max - y_min) / 4
     fig.update_layout(
         title={"text": "Scaling Law by Dataset", "x": 0.5, "xanchor": "center", "yanchor": "top"},