QSBench commited on
Commit
c906129
·
1 Parent(s): dcea369
Files changed (1) hide show
  1. app.py +67 -31
app.py CHANGED
@@ -1,61 +1,98 @@
1
  import gradio as gr
2
  import pandas as pd
 
3
  from datasets import load_dataset
4
  from sklearn.ensemble import RandomForestRegressor
5
  from sklearn.metrics import r2_score
6
  import matplotlib.pyplot as plt
7
 
8
- # Загружаем датасет (все данные в одном сплите 'train')
9
  print("Loading dataset...")
10
  ds_all = load_dataset("QSBench/QSBench-Core-v1.0.0-demo")
11
- # Берём только сплит 'train' (там все строки)
12
  df_all = pd.DataFrame(ds_all['train'])
13
 
14
- # Разделяем по колонке 'split'
15
  splits = {}
16
  for split_name in df_all['split'].unique():
17
  splits[split_name] = df_all[df_all['split'] == split_name].reset_index(drop=True)
18
 
19
  print("Available splits:", list(splits.keys()))
20
 
21
- # Функция для отображения таблицы
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  def show_data(split):
23
  if split in splits:
24
  return splits[split].head(10)
25
  else:
26
  return f"Split '{split}' not found"
27
 
28
- # Функция для обучения модели и создания графика
29
  def train_model():
30
- # Проверяем, что есть нужные сплиты
31
  if 'train' not in splits or 'test' not in splits:
32
  return None, "Error: train or test split not found in dataset"
33
-
34
- feature_cols = ["total_gates", "gate_entropy", "meyer_wallach"]
35
- target_col = "ideal_expval_Z_global"
36
-
37
- # Проверяем наличие колонок
38
- if not all(col in splits['train'].columns for col in feature_cols + [target_col]):
39
- missing = [col for col in feature_cols + [target_col] if col not in splits['train'].columns]
40
- return None, f"Error: missing columns: {missing}"
41
-
42
- X_train = splits['train'][feature_cols]
43
  y_train = splits['train'][target_col]
44
- X_test = splits['test'][feature_cols]
45
  y_test = splits['test'][target_col]
46
-
47
  model = RandomForestRegressor(n_estimators=100, random_state=42)
48
  model.fit(X_train, y_train)
49
  y_pred = model.predict(X_test)
50
  r2 = r2_score(y_test, y_pred)
51
-
52
- fig, ax = plt.subplots()
53
- ax.scatter(y_test, y_pred, alpha=0.5)
54
- ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
55
- ax.set_xlabel("True value")
56
- ax.set_ylabel("Predicted")
57
- ax.set_title(f"Predictions vs. Truth (R² = {r2:.4f})")
58
- return fig, f"R² score: {r2:.4f}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  # Интерфейс
61
  with gr.Blocks(title="QSBench Demo Explorer") as demo:
@@ -67,7 +104,7 @@ with gr.Blocks(title="QSBench Demo Explorer") as demo:
67
  👉 **Full datasets (up to 200k samples, noisy versions, 10‑qubit transpilation packs) are available for purchase.**
68
  [Visit the QSBench website](https://qsbench.github.io/)
69
  """)
70
-
71
  with gr.Tabs():
72
  with gr.TabItem("Data Explorer"):
73
  split_selector = gr.Dropdown(
@@ -77,15 +114,14 @@ with gr.Blocks(title="QSBench Demo Explorer") as demo:
77
  )
78
  data_table = gr.Dataframe(label="First 10 rows", interactive=False)
79
  split_selector.change(fn=show_data, inputs=split_selector, outputs=data_table)
80
- # Загружаем данные по умолчанию
81
  demo.load(fn=lambda: show_data(list(splits.keys())[0]), outputs=data_table)
82
-
83
  with gr.TabItem("Model Demo"):
84
  train_button = gr.Button("Train Random Forest")
85
  plot_output = gr.Plot()
86
- text_output = gr.Textbox(label="Result", interactive=False)
87
  train_button.click(fn=train_model, outputs=[plot_output, text_output])
88
-
89
  gr.Markdown("---")
90
  gr.Markdown("""
91
  ### Get the full datasets
 
1
  import gradio as gr
2
  import pandas as pd
3
+ import numpy as np
4
  from datasets import load_dataset
5
  from sklearn.ensemble import RandomForestRegressor
6
  from sklearn.metrics import r2_score
7
  import matplotlib.pyplot as plt
8
 
9
+ # Загружаем датасет и разделяем по колонке 'split'
10
  print("Loading dataset...")
11
  ds_all = load_dataset("QSBench/QSBench-Core-v1.0.0-demo")
 
12
  df_all = pd.DataFrame(ds_all['train'])
13
 
 
14
  splits = {}
15
  for split_name in df_all['split'].unique():
16
  splits[split_name] = df_all[df_all['split'] == split_name].reset_index(drop=True)
17
 
18
  print("Available splits:", list(splits.keys()))
19
 
20
+ # Список потенциальных признаков (числовые колонки, которые не являются целевыми или текстовыми)
21
+ numeric_cols = df_all.select_dtypes(include=[np.number]).columns.tolist()
22
+ # Исключаем явные целевые переменные и идентификаторы
23
+ exclude = ['sample_id', 'sample_seed', 'ideal_expval_Z_global', 'ideal_expval_X_global', 'ideal_expval_Y_global',
24
+ 'noisy_expval_Z_global', 'noisy_expval_X_global', 'noisy_expval_Y_global',
25
+ 'error_Z_global', 'error_X_global', 'error_Y_global',
26
+ 'sign_ideal_Z_global', 'sign_noisy_Z_global',
27
+ 'ideal_expval_Z_q0', 'ideal_expval_Z_q1', 'ideal_expval_Z_q2', 'ideal_expval_Z_q3', 'ideal_expval_Z_q4', 'ideal_expval_Z_q5',
28
+ 'noisy_expval_Z_q0', 'noisy_expval_Z_q1', 'noisy_expval_Z_q2', 'noisy_expval_Z_q3', 'noisy_expval_Z_q4', 'noisy_expval_Z_q5',
29
+ 'ideal_expval_X_q0', 'ideal_expval_X_q1', 'ideal_expval_X_q2', 'ideal_expval_X_q3', 'ideal_expval_X_q4', 'ideal_expval_X_q5',
30
+ 'noisy_expval_X_q0', 'noisy_expval_X_q1', 'noisy_expval_X_q2', 'noisy_expval_X_q3', 'noisy_expval_X_q4', 'noisy_expval_X_q5',
31
+ 'ideal_expval_Y_q0', 'ideal_expval_Y_q1', 'ideal_expval_Y_q2', 'ideal_expval_Y_q3', 'ideal_expval_Y_q4', 'ideal_expval_Y_q5',
32
+ 'noisy_expval_Y_q0', 'noisy_expval_Y_q1', 'noisy_expval_Y_q2', 'noisy_expval_Y_q3', 'noisy_expval_Y_q4', 'noisy_expval_Y_q5']
33
+ feature_cols = [col for col in numeric_cols if col not in exclude and not col.startswith('error_')]
34
+
35
+ # Целевая переменная
36
+ target_col = "ideal_expval_Z_global"
37
+
38
  def show_data(split):
39
  if split in splits:
40
  return splits[split].head(10)
41
  else:
42
  return f"Split '{split}' not found"
43
 
 
44
  def train_model():
 
45
  if 'train' not in splits or 'test' not in splits:
46
  return None, "Error: train or test split not found in dataset"
47
+
48
+ # Проверяем наличие признаков
49
+ available_features = [col for col in feature_cols if col in splits['train'].columns]
50
+ if not available_features:
51
+ return None, f"Error: no numeric feature columns found (tried: {feature_cols})"
52
+
53
+ X_train = splits['train'][available_features]
 
 
 
54
  y_train = splits['train'][target_col]
55
+ X_test = splits['test'][available_features]
56
  y_test = splits['test'][target_col]
57
+
58
  model = RandomForestRegressor(n_estimators=100, random_state=42)
59
  model.fit(X_train, y_train)
60
  y_pred = model.predict(X_test)
61
  r2 = r2_score(y_test, y_pred)
62
+
63
+ # График предсказаний
64
+ fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
65
+ ax1.scatter(y_test, y_pred, alpha=0.5)
66
+ ax1.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
67
+ ax1.set_xlabel("True value")
68
+ ax1.set_ylabel("Predicted")
69
+ ax1.set_title(f"Predictions vs. Truth\nR² = {r2:.4f}")
70
+
71
+ # Важность признаков
72
+ importances = model.feature_importances_
73
+ indices = np.argsort(importances)[-10:] # топ-10 признаков
74
+ ax2.barh(range(len(indices)), importances[indices])
75
+ ax2.set_yticks(range(len(indices)))
76
+ ax2.set_yticklabels([available_features[i] for i in indices])
77
+ ax2.set_xlabel("Feature importance")
78
+ ax2.set_title("Top 10 most important features")
79
+
80
+ plt.tight_layout()
81
+ explanation = f"""
82
+ **R² score:** {r2:.4f}
83
+
84
+ **What does it mean?**
85
+ R² measures how well the model explains the variance in the target.
86
+ - 1.0 = perfect prediction
87
+ - 0.0 = model predicts the mean (no better than guessing)
88
+ - Negative values = model performs worse than guessing the mean.
89
+
90
+ The current score is negative, which indicates that the chosen features (`total_gates`, `gate_entropy`, `meyer_wallach`, and others) are not strongly predictive of the ideal Z expectation value on this small dataset.
91
+ This is expected: quantum expectation values depend on many subtle circuit details. Larger datasets with richer features would allow better models.
92
+
93
+ 👉 **Our full datasets** contain up to 200,000 circuits, additional noise models, and more features – perfect for serious Quantum Machine Learning research.
94
+ """
95
+ return fig, explanation
96
 
97
  # Интерфейс
98
  with gr.Blocks(title="QSBench Demo Explorer") as demo:
 
104
  👉 **Full datasets (up to 200k samples, noisy versions, 10‑qubit transpilation packs) are available for purchase.**
105
  [Visit the QSBench website](https://qsbench.github.io/)
106
  """)
107
+
108
  with gr.Tabs():
109
  with gr.TabItem("Data Explorer"):
110
  split_selector = gr.Dropdown(
 
114
  )
115
  data_table = gr.Dataframe(label="First 10 rows", interactive=False)
116
  split_selector.change(fn=show_data, inputs=split_selector, outputs=data_table)
 
117
  demo.load(fn=lambda: show_data(list(splits.keys())[0]), outputs=data_table)
118
+
119
  with gr.TabItem("Model Demo"):
120
  train_button = gr.Button("Train Random Forest")
121
  plot_output = gr.Plot()
122
+ text_output = gr.Markdown()
123
  train_button.click(fn=train_model, outputs=[plot_output, text_output])
124
+
125
  gr.Markdown("---")
126
  gr.Markdown("""
127
  ### Get the full datasets