QSBench commited on
Commit
a4a80b8
·
verified ·
1 Parent(s): 3bf4374

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -26
app.py CHANGED
@@ -20,6 +20,7 @@ DATASET_MAP = {
20
 
21
  TARGET_COL = "ideal_expval_Z_global"
22
 
 
23
  EXCLUDE_COLS = {
24
  "sample_id", "sample_seed", "circuit_hash", "split", "circuit_qasm",
25
  "qasm_raw", "qasm_transpiled", "circuit_type_resolved", "circuit_type_requested",
@@ -41,13 +42,15 @@ def get_df(dataset_key):
41
 
42
  def get_numeric_feature_cols(df: pd.DataFrame) -> list[str]:
43
  numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
44
- # Убираем все таргеты и нерелевантные колонки
45
  return [c for c in numeric_cols if c not in EXCLUDE_COLS and not c.startswith("error_") and "expval" not in c]
46
 
47
  # =========================================================
48
  # LOGIC
49
  # =========================================================
50
- def update_explorer(dataset_name, split_name):
 
 
51
  df = get_df(dataset_name)
52
  splits = df["split"].unique().tolist() if "split" in df.columns else ["train"]
53
  filtered = df[df["split"] == split_name].head(10) if "split" in df.columns else df.head(10)
@@ -55,29 +58,34 @@ def update_explorer(dataset_name, split_name):
55
  qasm_raw = filtered["qasm_raw"].iloc[0] if "qasm_raw" in filtered.columns else "// N/A"
56
  qasm_tr = filtered["qasm_transpiled"].iloc[0] if "qasm_transpiled" in filtered.columns else "// N/A"
57
 
 
 
 
 
 
58
  features = get_numeric_feature_cols(df)
59
- # По умолчанию выбираем первые 8 признаков (обычно это n_qubits, depth и базовые гейты)
60
- default_features = features[:8]
 
61
 
62
- return gr.update(choices=splits), filtered, qasm_raw, qasm_tr, gr.update(choices=features, value=default_features)
63
 
64
  def run_model_demo(dataset_name, selected_features):
65
  df = get_df(dataset_name)
66
 
67
- # КРИТИЧЕСКОЕ ИСПРАВЛЕНИЕ: фильтруем признаки, которые реально есть в этом датасете
68
  valid_features = [f for f in selected_features if f in df.columns]
69
 
70
  if not valid_features:
71
- return None, "### ⚠️ No valid features selected for this dataset."
72
 
73
  target = TARGET_COL if TARGET_COL in df.columns else df.filter(like="expval").columns[0]
74
 
75
- # Подготовка данных
76
  work_df = df.dropna(subset=valid_features + [target]).reset_index(drop=True)
77
  X, y = work_df[valid_features], work_df[target]
78
 
79
- if len(work_df) < 50:
80
- return None, "### ⚠️ Not enough data rows to train."
81
 
82
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
83
 
@@ -88,35 +96,36 @@ def run_model_demo(dataset_name, selected_features):
88
  sns.set_theme(style="whitegrid")
89
  fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(18, 5))
90
 
91
- # Parity
92
  ax1.scatter(y_test, preds, alpha=0.4, color='#636EFA')
93
  ax1.plot([y.min(), y.max()], [y.min(), y.max()], 'r--', lw=2)
94
- ax1.set_title(f"R² = {r2_score(y_test, preds):.3f}")
95
  ax1.set_xlabel("Actual")
96
  ax1.set_ylabel("Predicted")
97
 
98
- # Importance
99
  importances = model.feature_importances_
100
- indices = np.argsort(importances)[-10:] # Только топ-10 для красоты
101
  ax2.barh(range(len(indices)), importances[indices], color='#EF553B')
102
  ax2.set_yticks(range(len(indices)))
103
  ax2.set_yticklabels([valid_features[i] for i in indices])
104
- ax2.set_title("Top Feature Importance")
105
 
106
- # Residuals
107
  sns.histplot(y_test - preds, kde=True, ax=ax3, color='#00CC96')
108
- ax3.set_title("Error Distribution")
109
 
110
  plt.tight_layout()
111
- return fig, f"### Train Stats: {dataset_name}\n**MAE:** {mean_absolute_error(y_test, preds):.4f}"
112
 
113
  # =========================================================
114
  # UI
115
  # =========================================================
116
- with gr.Blocks() as demo:
117
- gr.Markdown("# 🌌 QSBench Unified Explorer")
118
 
119
  with gr.Tabs():
 
120
  with gr.TabItem("🔎 Explorer"):
121
  with gr.Row():
122
  ds_selector = gr.Dropdown(choices=list(DATASET_MAP.keys()), value="Core (Clean)", label="Dataset")
@@ -128,21 +137,32 @@ with gr.Blocks() as demo:
128
  qasm_raw_view = gr.Code(label="Raw QASM", language="python", lines=10)
129
  qasm_tr_view = gr.Code(label="Transpiled QASM", language="python", lines=10)
130
 
 
131
  with gr.TabItem("🤖 ML Demo"):
132
  with gr.Row():
133
  with gr.Column(scale=1):
134
- m_ds_selector = gr.Dropdown(choices=list(DATASET_MAP.keys()), value="Core (Clean)", label="Target Dataset")
135
- f_selector = gr.CheckboxGroup(label="Features", choices=[])
136
- train_btn = gr.Button("Train", variant="primary")
 
137
  with gr.Column(scale=2):
138
  plot_out = gr.Plot()
139
  text_out = gr.Markdown()
140
 
141
- # Ссылки
142
- ds_selector.change(update_explorer, [ds_selector, split_selector], [split_selector, data_table, qasm_raw_view, qasm_tr_view, f_selector])
 
 
 
 
 
 
 
143
  train_btn.click(run_model_demo, [m_ds_selector, f_selector], [plot_out, text_out])
144
 
145
- demo.load(update_explorer, [ds_selector, split_selector], [split_selector, data_table, qasm_raw_view, qasm_tr_view, f_selector])
 
 
146
 
147
  if __name__ == "__main__":
148
  demo.launch(theme=gr.themes.Soft())
 
20
 
21
  TARGET_COL = "ideal_expval_Z_global"
22
 
23
+ # Колонки, которые никогда не должны быть признаками (фичами)
24
  EXCLUDE_COLS = {
25
  "sample_id", "sample_seed", "circuit_hash", "split", "circuit_qasm",
26
  "qasm_raw", "qasm_transpiled", "circuit_type_resolved", "circuit_type_requested",
 
42
 
43
  def get_numeric_feature_cols(df: pd.DataFrame) -> list[str]:
44
  numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
45
+ # Оставляем только структурные метрики, убираем таргеты и ошибки
46
  return [c for c in numeric_cols if c not in EXCLUDE_COLS and not c.startswith("error_") and "expval" not in c]
47
 
48
  # =========================================================
49
  # LOGIC
50
  # =========================================================
51
+
52
+ # Функция для обновления первой вкладки (Explorer)
53
+ def update_explorer_tab(dataset_name, split_name):
54
  df = get_df(dataset_name)
55
  splits = df["split"].unique().tolist() if "split" in df.columns else ["train"]
56
  filtered = df[df["split"] == split_name].head(10) if "split" in df.columns else df.head(10)
 
58
  qasm_raw = filtered["qasm_raw"].iloc[0] if "qasm_raw" in filtered.columns else "// N/A"
59
  qasm_tr = filtered["qasm_transpiled"].iloc[0] if "qasm_transpiled" in filtered.columns else "// N/A"
60
 
61
+ return gr.update(choices=splits), filtered, qasm_raw, qasm_tr
62
+
63
+ # Функция для обновления списка фичей во второй вкладке (ML Demo)
64
+ def update_ml_features(dataset_name):
65
+ df = get_df(dataset_name)
66
  features = get_numeric_feature_cols(df)
67
+ # По умолчанию выбираем первые несколько важных метрик
68
+ default_selection = [f for f in ["n_qubits", "depth", "total_gates", "gate_entropy", "meyer_wallach"] if f in features]
69
+ if not default_selection: default_selection = features[:5]
70
 
71
+ return gr.update(choices=features, value=default_selection)
72
 
73
  def run_model_demo(dataset_name, selected_features):
74
  df = get_df(dataset_name)
75
 
76
+ # Защита от несуществующих колонок (KeyError)
77
  valid_features = [f for f in selected_features if f in df.columns]
78
 
79
  if not valid_features:
80
+ return None, "### ⚠️ Ошибка: Выбранные признаки не найдены в этом датасете."
81
 
82
  target = TARGET_COL if TARGET_COL in df.columns else df.filter(like="expval").columns[0]
83
 
 
84
  work_df = df.dropna(subset=valid_features + [target]).reset_index(drop=True)
85
  X, y = work_df[valid_features], work_df[target]
86
 
87
+ if len(work_df) < 20:
88
+ return None, "### ⚠️ Недостаточно данных для обучения."
89
 
90
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
91
 
 
96
  sns.set_theme(style="whitegrid")
97
  fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(18, 5))
98
 
99
+ # График предсказаний
100
  ax1.scatter(y_test, preds, alpha=0.4, color='#636EFA')
101
  ax1.plot([y.min(), y.max()], [y.min(), y.max()], 'r--', lw=2)
102
+ ax1.set_title(f"R² Score: {r2_score(y_test, preds):.3f}")
103
  ax1.set_xlabel("Actual")
104
  ax1.set_ylabel("Predicted")
105
 
106
+ # Важность признаков (топ-10)
107
  importances = model.feature_importances_
108
+ indices = np.argsort(importances)[-10:]
109
  ax2.barh(range(len(indices)), importances[indices], color='#EF553B')
110
  ax2.set_yticks(range(len(indices)))
111
  ax2.set_yticklabels([valid_features[i] for i in indices])
112
+ ax2.set_title("Feature Importance")
113
 
114
+ # Распределение ошибок
115
  sns.histplot(y_test - preds, kde=True, ax=ax3, color='#00CC96')
116
+ ax3.set_title("Residuals")
117
 
118
  plt.tight_layout()
119
+ return fig, f"### Отчет по датасету: {dataset_name}\n**MAE:** {mean_absolute_error(y_test, preds):.4f}"
120
 
121
  # =========================================================
122
  # UI
123
  # =========================================================
124
+ with gr.Blocks(title="QSBench Explorer") as demo:
125
+ gr.Markdown("# 🌌 QSBench: Quantum Synthetic Benchmark")
126
 
127
  with gr.Tabs():
128
+ # ВКЛАДКА 1: ПРОСМОТР ДАННЫХ
129
  with gr.TabItem("🔎 Explorer"):
130
  with gr.Row():
131
  ds_selector = gr.Dropdown(choices=list(DATASET_MAP.keys()), value="Core (Clean)", label="Dataset")
 
137
  qasm_raw_view = gr.Code(label="Raw QASM", language="python", lines=10)
138
  qasm_tr_view = gr.Code(label="Transpiled QASM", language="python", lines=10)
139
 
140
+ # ВКЛАДКА 2: МАШИННОЕ ОБУЧЕНИЕ
141
  with gr.TabItem("🤖 ML Demo"):
142
  with gr.Row():
143
  with gr.Column(scale=1):
144
+ gr.Markdown("### Настройка обучения")
145
+ m_ds_selector = gr.Dropdown(choices=list(DATASET_MAP.keys()), value="Core (Clean)", label="Dataset for ML")
146
+ f_selector = gr.CheckboxGroup(label="Признаки (Features)", choices=[])
147
+ train_btn = gr.Button("Запустить обучение", variant="primary")
148
  with gr.Column(scale=2):
149
  plot_out = gr.Plot()
150
  text_out = gr.Markdown()
151
 
152
+ # --- ЛОГИКА СОБЫТИЙ ---
153
+
154
+ # При изменении датасета в Explorer — обновляем таблицу и QASM
155
+ ds_selector.change(update_explorer_tab, [ds_selector, split_selector], [split_selector, data_table, qasm_raw_view, qasm_tr_view])
156
+
157
+ # ПРИНЦИПИАЛЬНО: При изменении датасета в ML Demo — обновляем список чекбоксов
158
+ m_ds_selector.change(update_ml_features, inputs=[m_ds_selector], outputs=[f_selector])
159
+
160
+ # Кнопка обучения
161
  train_btn.click(run_model_demo, [m_ds_selector, f_selector], [plot_out, text_out])
162
 
163
+ # Инициализация при старте
164
+ demo.load(update_explorer_tab, [ds_selector, split_selector], [split_selector, data_table, qasm_raw_view, qasm_tr_view])
165
+ demo.load(update_ml_features, [m_ds_selector], [f_selector])
166
 
167
  if __name__ == "__main__":
168
  demo.launch(theme=gr.themes.Soft())