Spaces:
Running
Running
fix
Browse files
app.py
CHANGED
|
@@ -1,61 +1,98 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
|
|
|
| 3 |
from datasets import load_dataset
|
| 4 |
from sklearn.ensemble import RandomForestRegressor
|
| 5 |
from sklearn.metrics import r2_score
|
| 6 |
import matplotlib.pyplot as plt
|
| 7 |
|
| 8 |
-
# Загружаем датасет
|
| 9 |
print("Loading dataset...")
|
| 10 |
ds_all = load_dataset("QSBench/QSBench-Core-v1.0.0-demo")
|
| 11 |
-
# Берём только сплит 'train' (там все строки)
|
| 12 |
df_all = pd.DataFrame(ds_all['train'])
|
| 13 |
|
| 14 |
-
# Разделяем по колонке 'split'
|
| 15 |
splits = {}
|
| 16 |
for split_name in df_all['split'].unique():
|
| 17 |
splits[split_name] = df_all[df_all['split'] == split_name].reset_index(drop=True)
|
| 18 |
|
| 19 |
print("Available splits:", list(splits.keys()))
|
| 20 |
|
| 21 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
def show_data(split):
|
| 23 |
if split in splits:
|
| 24 |
return splits[split].head(10)
|
| 25 |
else:
|
| 26 |
return f"Split '{split}' not found"
|
| 27 |
|
| 28 |
-
# Функция для обучения модели и создания графика
|
| 29 |
def train_model():
|
| 30 |
-
# Проверяем, что есть нужные сплиты
|
| 31 |
if 'train' not in splits or 'test' not in splits:
|
| 32 |
return None, "Error: train or test split not found in dataset"
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
return None, f"Error: missing columns: {missing}"
|
| 41 |
-
|
| 42 |
-
X_train = splits['train'][feature_cols]
|
| 43 |
y_train = splits['train'][target_col]
|
| 44 |
-
X_test = splits['test'][
|
| 45 |
y_test = splits['test'][target_col]
|
| 46 |
-
|
| 47 |
model = RandomForestRegressor(n_estimators=100, random_state=42)
|
| 48 |
model.fit(X_train, y_train)
|
| 49 |
y_pred = model.predict(X_test)
|
| 50 |
r2 = r2_score(y_test, y_pred)
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
# Интерфейс
|
| 61 |
with gr.Blocks(title="QSBench Demo Explorer") as demo:
|
|
@@ -67,7 +104,7 @@ with gr.Blocks(title="QSBench Demo Explorer") as demo:
|
|
| 67 |
👉 **Full datasets (up to 200k samples, noisy versions, 10‑qubit transpilation packs) are available for purchase.**
|
| 68 |
[Visit the QSBench website](https://qsbench.github.io/)
|
| 69 |
""")
|
| 70 |
-
|
| 71 |
with gr.Tabs():
|
| 72 |
with gr.TabItem("Data Explorer"):
|
| 73 |
split_selector = gr.Dropdown(
|
|
@@ -77,15 +114,14 @@ with gr.Blocks(title="QSBench Demo Explorer") as demo:
|
|
| 77 |
)
|
| 78 |
data_table = gr.Dataframe(label="First 10 rows", interactive=False)
|
| 79 |
split_selector.change(fn=show_data, inputs=split_selector, outputs=data_table)
|
| 80 |
-
# Загружаем данные по умолчанию
|
| 81 |
demo.load(fn=lambda: show_data(list(splits.keys())[0]), outputs=data_table)
|
| 82 |
-
|
| 83 |
with gr.TabItem("Model Demo"):
|
| 84 |
train_button = gr.Button("Train Random Forest")
|
| 85 |
plot_output = gr.Plot()
|
| 86 |
-
text_output = gr.
|
| 87 |
train_button.click(fn=train_model, outputs=[plot_output, text_output])
|
| 88 |
-
|
| 89 |
gr.Markdown("---")
|
| 90 |
gr.Markdown("""
|
| 91 |
### Get the full datasets
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
from datasets import load_dataset
|
| 5 |
from sklearn.ensemble import RandomForestRegressor
|
| 6 |
from sklearn.metrics import r2_score
|
| 7 |
import matplotlib.pyplot as plt
|
| 8 |
|
| 9 |
+
# Загружаем датасет и разделяем по колонке 'split'
|
| 10 |
print("Loading dataset...")
|
| 11 |
ds_all = load_dataset("QSBench/QSBench-Core-v1.0.0-demo")
|
|
|
|
| 12 |
df_all = pd.DataFrame(ds_all['train'])
|
| 13 |
|
|
|
|
| 14 |
splits = {}
|
| 15 |
for split_name in df_all['split'].unique():
|
| 16 |
splits[split_name] = df_all[df_all['split'] == split_name].reset_index(drop=True)
|
| 17 |
|
| 18 |
print("Available splits:", list(splits.keys()))
|
| 19 |
|
| 20 |
+
# Список потенциальных признаков (числовые колонки, которые не являются целевыми или текстовыми)
|
| 21 |
+
numeric_cols = df_all.select_dtypes(include=[np.number]).columns.tolist()
|
| 22 |
+
# Исключаем явные целевые переменные и идентификаторы
|
| 23 |
+
exclude = ['sample_id', 'sample_seed', 'ideal_expval_Z_global', 'ideal_expval_X_global', 'ideal_expval_Y_global',
|
| 24 |
+
'noisy_expval_Z_global', 'noisy_expval_X_global', 'noisy_expval_Y_global',
|
| 25 |
+
'error_Z_global', 'error_X_global', 'error_Y_global',
|
| 26 |
+
'sign_ideal_Z_global', 'sign_noisy_Z_global',
|
| 27 |
+
'ideal_expval_Z_q0', 'ideal_expval_Z_q1', 'ideal_expval_Z_q2', 'ideal_expval_Z_q3', 'ideal_expval_Z_q4', 'ideal_expval_Z_q5',
|
| 28 |
+
'noisy_expval_Z_q0', 'noisy_expval_Z_q1', 'noisy_expval_Z_q2', 'noisy_expval_Z_q3', 'noisy_expval_Z_q4', 'noisy_expval_Z_q5',
|
| 29 |
+
'ideal_expval_X_q0', 'ideal_expval_X_q1', 'ideal_expval_X_q2', 'ideal_expval_X_q3', 'ideal_expval_X_q4', 'ideal_expval_X_q5',
|
| 30 |
+
'noisy_expval_X_q0', 'noisy_expval_X_q1', 'noisy_expval_X_q2', 'noisy_expval_X_q3', 'noisy_expval_X_q4', 'noisy_expval_X_q5',
|
| 31 |
+
'ideal_expval_Y_q0', 'ideal_expval_Y_q1', 'ideal_expval_Y_q2', 'ideal_expval_Y_q3', 'ideal_expval_Y_q4', 'ideal_expval_Y_q5',
|
| 32 |
+
'noisy_expval_Y_q0', 'noisy_expval_Y_q1', 'noisy_expval_Y_q2', 'noisy_expval_Y_q3', 'noisy_expval_Y_q4', 'noisy_expval_Y_q5']
|
| 33 |
+
feature_cols = [col for col in numeric_cols if col not in exclude and not col.startswith('error_')]
|
| 34 |
+
|
| 35 |
+
# Целевая переменная
|
| 36 |
+
target_col = "ideal_expval_Z_global"
|
| 37 |
+
|
| 38 |
def show_data(split):
|
| 39 |
if split in splits:
|
| 40 |
return splits[split].head(10)
|
| 41 |
else:
|
| 42 |
return f"Split '{split}' not found"
|
| 43 |
|
|
|
|
| 44 |
def train_model():
|
|
|
|
| 45 |
if 'train' not in splits or 'test' not in splits:
|
| 46 |
return None, "Error: train or test split not found in dataset"
|
| 47 |
+
|
| 48 |
+
# Проверяем наличие признаков
|
| 49 |
+
available_features = [col for col in feature_cols if col in splits['train'].columns]
|
| 50 |
+
if not available_features:
|
| 51 |
+
return None, f"Error: no numeric feature columns found (tried: {feature_cols})"
|
| 52 |
+
|
| 53 |
+
X_train = splits['train'][available_features]
|
|
|
|
|
|
|
|
|
|
| 54 |
y_train = splits['train'][target_col]
|
| 55 |
+
X_test = splits['test'][available_features]
|
| 56 |
y_test = splits['test'][target_col]
|
| 57 |
+
|
| 58 |
model = RandomForestRegressor(n_estimators=100, random_state=42)
|
| 59 |
model.fit(X_train, y_train)
|
| 60 |
y_pred = model.predict(X_test)
|
| 61 |
r2 = r2_score(y_test, y_pred)
|
| 62 |
+
|
| 63 |
+
# График предсказаний
|
| 64 |
+
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
|
| 65 |
+
ax1.scatter(y_test, y_pred, alpha=0.5)
|
| 66 |
+
ax1.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
|
| 67 |
+
ax1.set_xlabel("True value")
|
| 68 |
+
ax1.set_ylabel("Predicted")
|
| 69 |
+
ax1.set_title(f"Predictions vs. Truth\nR² = {r2:.4f}")
|
| 70 |
+
|
| 71 |
+
# Важность признаков
|
| 72 |
+
importances = model.feature_importances_
|
| 73 |
+
indices = np.argsort(importances)[-10:] # топ-10 признаков
|
| 74 |
+
ax2.barh(range(len(indices)), importances[indices])
|
| 75 |
+
ax2.set_yticks(range(len(indices)))
|
| 76 |
+
ax2.set_yticklabels([available_features[i] for i in indices])
|
| 77 |
+
ax2.set_xlabel("Feature importance")
|
| 78 |
+
ax2.set_title("Top 10 most important features")
|
| 79 |
+
|
| 80 |
+
plt.tight_layout()
|
| 81 |
+
explanation = f"""
|
| 82 |
+
**R² score:** {r2:.4f}
|
| 83 |
+
|
| 84 |
+
**What does it mean?**
|
| 85 |
+
R² measures how well the model explains the variance in the target.
|
| 86 |
+
- 1.0 = perfect prediction
|
| 87 |
+
- 0.0 = model predicts the mean (no better than guessing)
|
| 88 |
+
- Negative values = model performs worse than guessing the mean.
|
| 89 |
+
|
| 90 |
+
The current score is negative, which indicates that the chosen features (`total_gates`, `gate_entropy`, `meyer_wallach`, and others) are not strongly predictive of the ideal Z expectation value on this small dataset.
|
| 91 |
+
This is expected: quantum expectation values depend on many subtle circuit details. Larger datasets with richer features would allow better models.
|
| 92 |
+
|
| 93 |
+
👉 **Our full datasets** contain up to 200,000 circuits, additional noise models, and more features – perfect for serious Quantum Machine Learning research.
|
| 94 |
+
"""
|
| 95 |
+
return fig, explanation
|
| 96 |
|
| 97 |
# Интерфейс
|
| 98 |
with gr.Blocks(title="QSBench Demo Explorer") as demo:
|
|
|
|
| 104 |
👉 **Full datasets (up to 200k samples, noisy versions, 10‑qubit transpilation packs) are available for purchase.**
|
| 105 |
[Visit the QSBench website](https://qsbench.github.io/)
|
| 106 |
""")
|
| 107 |
+
|
| 108 |
with gr.Tabs():
|
| 109 |
with gr.TabItem("Data Explorer"):
|
| 110 |
split_selector = gr.Dropdown(
|
|
|
|
| 114 |
)
|
| 115 |
data_table = gr.Dataframe(label="First 10 rows", interactive=False)
|
| 116 |
split_selector.change(fn=show_data, inputs=split_selector, outputs=data_table)
|
|
|
|
| 117 |
demo.load(fn=lambda: show_data(list(splits.keys())[0]), outputs=data_table)
|
| 118 |
+
|
| 119 |
with gr.TabItem("Model Demo"):
|
| 120 |
train_button = gr.Button("Train Random Forest")
|
| 121 |
plot_output = gr.Plot()
|
| 122 |
+
text_output = gr.Markdown()
|
| 123 |
train_button.click(fn=train_model, outputs=[plot_output, text_output])
|
| 124 |
+
|
| 125 |
gr.Markdown("---")
|
| 126 |
gr.Markdown("""
|
| 127 |
### Get the full datasets
|