QSBench commited on
Commit
048fad7
·
verified ·
1 Parent(s): f9c67d8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -14
app.py CHANGED
@@ -38,12 +38,12 @@ REPO_CONFIG = {
38
  }
39
  }
40
 
41
- # Колонки, которые нельзя использовать как фичи для обучения
42
  NON_FEATURE_COLS = {
43
  "sample_id", "sample_seed", "circuit_hash", "split", "circuit_qasm",
44
  "qasm_raw", "qasm_transpiled", "circuit_type_resolved", "circuit_type_requested",
45
  "noise_type", "noise_prob", "observable_bases", "observable_mode", "backend_device",
46
- "precision_mode", "circuit_signature"
47
  }
48
 
49
  _ASSET_CACHE = {}
@@ -79,20 +79,23 @@ def get_methodology_content(ds_name: str):
79
  """
80
 
81
  def sync_ml_metrics(ds_name: str):
82
- """Динамически находит все доступные числовые метрики для конкретного датасета"""
83
  assets = load_all_assets(ds_name)
84
  df = assets["df"]
85
 
86
- # Берем только числа, исключая таргеты и служебные поля
87
  numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
 
 
88
  valid_features = [
89
  c for c in numeric_cols
90
  if c not in NON_FEATURE_COLS
91
- and not any(x in c for x in ["ideal_", "noisy_", "error_", "sign_"])
92
  ]
93
 
94
- # Выбираем "золотой стандарт" по умолчанию, если они есть
95
- defaults = [f for f in ["gate_entropy", "meyer_wallach", "n_qubits", "depth", "total_gates"] if f in valid_features]
 
96
 
97
  return gr.update(choices=valid_features, value=defaults or valid_features[:5])
98
 
@@ -100,6 +103,8 @@ def train_model(ds_name: str, features: List[str]):
100
  if not features: return None, "### ❌ Error: No metrics selected."
101
  assets = load_all_assets(ds_name)
102
  df = assets["df"]
 
 
103
  target = "ideal_expval_Z_global"
104
 
105
  train_df = df.dropna(subset=features + [target])
@@ -112,20 +117,26 @@ def train_model(ds_name: str, features: List[str]):
112
  sns.set_theme(style="whitegrid", context="talk")
113
  fig, axes = plt.subplots(1, 3, figsize=(24, 8))
114
 
 
115
  axes[0].scatter(y_test, preds, alpha=0.3, color='#2c3e50')
116
  axes[0].plot([y.min(), y.max()], [y.min(), y.max()], 'r--', lw=2)
117
  axes[0].set_title(f"Accuracy (R²: {r2_score(y_test, preds):.3f})")
 
118
 
 
119
  imp = model.feature_importances_
120
- idx = np.argsort(imp)[-10:]
 
 
121
  axes[1].barh([features[i] for i in idx], imp[idx], color='#27ae60')
122
- axes[1].set_title("Top Metrics Importance")
123
 
 
124
  sns.histplot(y_test - preds, kde=True, ax=axes[2], color='#d35400')
125
- axes[2].set_title("Residuals")
126
 
127
  plt.tight_layout(pad=3.0)
128
- return fig, f"**MAE:** {mean_absolute_error(y_test, preds):.4f}"
129
 
130
  def update_explorer(ds_name: str, split_name: str):
131
  assets = load_all_assets(ds_name)
@@ -157,8 +168,8 @@ with gr.Blocks(theme=gr.themes.Soft(), title="QSBench Hub") as demo:
157
  with gr.Row():
158
  with gr.Column(scale=1):
159
  ml_ds_sel = gr.Dropdown(list(REPO_CONFIG.keys()), value="Core (Clean)", label="Select Dataset")
160
- # Динамический список метрик
161
- ml_feat_sel = gr.CheckboxGroup(label="Available Metrics (Auto-detected)", choices=[])
162
  train_btn = gr.Button("Execute Baseline", variant="primary")
163
  with gr.Column(scale=2):
164
  p_out = gr.Plot()
@@ -178,7 +189,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="QSBench Hub") as demo:
178
  # Explorer
179
  ds_sel.change(update_explorer, [ds_sel, sp_sel], [sp_sel, data_view, c_raw, c_tr, meta_txt])
180
 
181
- # ML Tab: Обновление списка метрик при смене датасета
182
  ml_ds_sel.change(sync_ml_metrics, [ml_ds_sel], [ml_feat_sel])
183
  train_btn.click(train_model, [ml_ds_sel, ml_feat_sel], [p_out, t_out])
184
 
 
38
  }
39
  }
40
 
41
+ # Колонки, которые НЕ являются фичами (системные, категориальные или таргеты)
42
  NON_FEATURE_COLS = {
43
  "sample_id", "sample_seed", "circuit_hash", "split", "circuit_qasm",
44
  "qasm_raw", "qasm_transpiled", "circuit_type_resolved", "circuit_type_requested",
45
  "noise_type", "noise_prob", "observable_bases", "observable_mode", "backend_device",
46
+ "precision_mode", "circuit_signature", "entanglement", "shots", "gpu_requested", "gpu_available"
47
  }
48
 
49
  _ASSET_CACHE = {}
 
79
  """
80
 
81
  def sync_ml_metrics(ds_name: str):
82
+ """Динамически находит все доступные числовые метрики (фичи) из CSV/Dataset"""
83
  assets = load_all_assets(ds_name)
84
  df = assets["df"]
85
 
86
+ # Извлекаем все числовые колонки
87
  numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
88
+
89
+ # Фильтруем: убираем системные ID и таргеты (всё, что начинается на ideal/noisy/error/sign)
90
  valid_features = [
91
  c for c in numeric_cols
92
  if c not in NON_FEATURE_COLS
93
+ and not any(prefix in c for prefix in ["ideal_", "noisy_", "error_", "sign_"])
94
  ]
95
 
96
+ # Приоритетные метрики для выбора "по умолчанию"
97
+ top_tier = ["gate_entropy", "meyer_wallach", "adjacency", "depth", "total_gates", "cx_count"]
98
+ defaults = [f for f in top_tier if f in valid_features]
99
 
100
  return gr.update(choices=valid_features, value=defaults or valid_features[:5])
101
 
 
103
  if not features: return None, "### ❌ Error: No metrics selected."
104
  assets = load_all_assets(ds_name)
105
  df = assets["df"]
106
+
107
+ # Используем глобальное значение Z как таргет
108
  target = "ideal_expval_Z_global"
109
 
110
  train_df = df.dropna(subset=features + [target])
 
117
  sns.set_theme(style="whitegrid", context="talk")
118
  fig, axes = plt.subplots(1, 3, figsize=(24, 8))
119
 
120
+ # 1. Prediction vs Reality
121
  axes[0].scatter(y_test, preds, alpha=0.3, color='#2c3e50')
122
  axes[0].plot([y.min(), y.max()], [y.min(), y.max()], 'r--', lw=2)
123
  axes[0].set_title(f"Accuracy (R²: {r2_score(y_test, preds):.3f})")
124
+ axes[0].set_xlabel("Ideal ExpVal"); axes[0].set_ylabel("Predicted")
125
 
126
+ # 2. Feature Importance
127
  imp = model.feature_importances_
128
+ # Берем топ-10 если их много, или все если мало
129
+ top_n = min(len(features), 10)
130
+ idx = np.argsort(imp)[-top_n:]
131
  axes[1].barh([features[i] for i in idx], imp[idx], color='#27ae60')
132
+ axes[1].set_title(f"Top {top_n} Metrics Importance")
133
 
134
+ # 3. Residuals
135
  sns.histplot(y_test - preds, kde=True, ax=axes[2], color='#d35400')
136
+ axes[2].set_title("Residuals (Error Distribution)")
137
 
138
  plt.tight_layout(pad=3.0)
139
+ return fig, f"**Mean Absolute Error (MAE):** {mean_absolute_error(y_test, preds):.4f}"
140
 
141
  def update_explorer(ds_name: str, split_name: str):
142
  assets = load_all_assets(ds_name)
 
168
  with gr.Row():
169
  with gr.Column(scale=1):
170
  ml_ds_sel = gr.Dropdown(list(REPO_CONFIG.keys()), value="Core (Clean)", label="Select Dataset")
171
+ # Динамический список метрик, извлекаемый из CSV
172
+ ml_feat_sel = gr.CheckboxGroup(label="Available Metrics (extracted from CSV)", choices=[])
173
  train_btn = gr.Button("Execute Baseline", variant="primary")
174
  with gr.Column(scale=2):
175
  p_out = gr.Plot()
 
189
  # Explorer
190
  ds_sel.change(update_explorer, [ds_sel, sp_sel], [sp_sel, data_view, c_raw, c_tr, meta_txt])
191
 
192
+ # ML Tab: Динамическое обновление метрик
193
  ml_ds_sel.change(sync_ml_metrics, [ml_ds_sel], [ml_feat_sel])
194
  train_btn.click(train_model, [ml_ds_sel, ml_feat_sel], [p_out, t_out])
195