Spaces:

QSBench
/

Multi-Target_Regression

Running

App Files Files Community

QSBench commited on 4 days ago

Commit

dcea369

1 Parent(s): c7ba23d

fix

Browse files

Files changed (1) hide show

app.py +39 -36

app.py CHANGED Viewed

@@ -4,54 +4,58 @@ from datasets import load_dataset
 from sklearn.ensemble import RandomForestRegressor
 from sklearn.metrics import r2_score
 import matplotlib.pyplot as plt
-import sys
-# Загружаем датасет
 print("Loading dataset...")
-ds = load_dataset("QSBench/QSBench-Core-v1.0.0-demo")
-print("Available splits:", list(ds.keys()))  # выводим в логи
 # Функция для отображения таблицы
 def show_data(split):
-    try:
-        df = pd.DataFrame(ds[split])
-        return df.head(10)
-    except Exception as e:
-        return f"Error loading data: {e}"
 # Функция для обучения модели и создания графика
 def train_model():
-    # Определяем доступные сплиты
-    splits = list(ds.keys())
-    if "test" not in splits:
-        return "Error: 'test' split not found in dataset", None
     feature_cols = ["total_gates", "gate_entropy", "meyer_wallach"]
     target_col = "ideal_expval_Z_global"
     # Проверяем наличие колонок
-    sample_df = pd.DataFrame(ds[splits[0]])
-    if not all(col in sample_df.columns for col in feature_cols + [target_col]):
-        missing = [col for col in feature_cols + [target_col] if col not in sample_df.columns]
-        return f"Error: missing columns: {missing}", None
-    X_train = pd.DataFrame(ds["train"])[feature_cols]
-    y_train = pd.DataFrame(ds["train"])[target_col]
-    X_test = pd.DataFrame(ds["test"])[feature_cols]
-    y_test = pd.DataFrame(ds["test"])[target_col]
     model = RandomForestRegressor(n_estimators=100, random_state=42)
     model.fit(X_train, y_train)
     y_pred = model.predict(X_test)
     r2 = r2_score(y_test, y_pred)
     fig, ax = plt.subplots()
     ax.scatter(y_test, y_pred, alpha=0.5)
     ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
     ax.set_xlabel("True value")
     ax.set_ylabel("Predicted")
     ax.set_title(f"Predictions vs. Truth (R² = {r2:.4f})")
-    return fig
 # Интерфейс
 with gr.Blocks(title="QSBench Demo Explorer") as demo:
@@ -63,26 +67,25 @@ with gr.Blocks(title="QSBench Demo Explorer") as demo:
     👉 **Full datasets (up to 200k samples, noisy versions, 10‑qubit transpilation packs) are available for purchase.**
     [Visit the QSBench website](https://qsbench.github.io/)
     """)
     with gr.Tabs():
         with gr.TabItem("Data Explorer"):
-            # Определяем доступные сплиты для выпадающего списка
-            available_splits = list(ds.keys())
             split_selector = gr.Dropdown(
-                choices=available_splits,
                 label="Choose a split",
-                value=available_splits[0] if available_splits else None
             )
             data_table = gr.Dataframe(label="First 10 rows", interactive=False)
             split_selector.change(fn=show_data, inputs=split_selector, outputs=data_table)
             # Загружаем данные по умолчанию
-            demo.load(fn=lambda: show_data(available_splits[0]), outputs=data_table)
         with gr.TabItem("Model Demo"):
             train_button = gr.Button("Train Random Forest")
             plot_output = gr.Plot()
-            train_button.click(fn=train_model, outputs=plot_output)
     gr.Markdown("---")
     gr.Markdown("""
     ### Get the full datasets

 from sklearn.ensemble import RandomForestRegressor
 from sklearn.metrics import r2_score
 import matplotlib.pyplot as plt
+# Загружаем датасет (все данные в одном сплите 'train')
 print("Loading dataset...")
+ds_all = load_dataset("QSBench/QSBench-Core-v1.0.0-demo")
+# Берём только сплит 'train' (там все строки)
+df_all = pd.DataFrame(ds_all['train'])
+# Разделяем по колонке 'split'
+splits = {}
+for split_name in df_all['split'].unique():
+    splits[split_name] = df_all[df_all['split'] == split_name].reset_index(drop=True)
+print("Available splits:", list(splits.keys()))
 # Функция для отображения таблицы
 def show_data(split):
+    if split in splits:
+        return splits[split].head(10)
+    else:
+        return f"Split '{split}' not found"
 # Функция для обучения модели и создания графика
 def train_model():
+    # Проверяем, что есть нужные сплиты
+    if 'train' not in splits or 'test' not in splits:
+        return None, "Error: train or test split not found in dataset"
     feature_cols = ["total_gates", "gate_entropy", "meyer_wallach"]
     target_col = "ideal_expval_Z_global"
     # Проверяем наличие колонок
+    if not all(col in splits['train'].columns for col in feature_cols + [target_col]):
+        missing = [col for col in feature_cols + [target_col] if col not in splits['train'].columns]
+        return None, f"Error: missing columns: {missing}"
+    X_train = splits['train'][feature_cols]
+    y_train = splits['train'][target_col]
+    X_test = splits['test'][feature_cols]
+    y_test = splits['test'][target_col]
     model = RandomForestRegressor(n_estimators=100, random_state=42)
     model.fit(X_train, y_train)
     y_pred = model.predict(X_test)
     r2 = r2_score(y_test, y_pred)
     fig, ax = plt.subplots()
     ax.scatter(y_test, y_pred, alpha=0.5)
     ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
     ax.set_xlabel("True value")
     ax.set_ylabel("Predicted")
     ax.set_title(f"Predictions vs. Truth (R² = {r2:.4f})")
+    return fig, f"R² score: {r2:.4f}"
 # Интерфейс
 with gr.Blocks(title="QSBench Demo Explorer") as demo:
     👉 **Full datasets (up to 200k samples, noisy versions, 10‑qubit transpilation packs) are available for purchase.**
     [Visit the QSBench website](https://qsbench.github.io/)
     """)
     with gr.Tabs():
         with gr.TabItem("Data Explorer"):
             split_selector = gr.Dropdown(
+                choices=list(splits.keys()),
                 label="Choose a split",
+                value=list(splits.keys())[0] if splits else None
             )
             data_table = gr.Dataframe(label="First 10 rows", interactive=False)
             split_selector.change(fn=show_data, inputs=split_selector, outputs=data_table)
             # Загружаем данные по умолчанию
+            demo.load(fn=lambda: show_data(list(splits.keys())[0]), outputs=data_table)
         with gr.TabItem("Model Demo"):
             train_button = gr.Button("Train Random Forest")
             plot_output = gr.Plot()
+            text_output = gr.Textbox(label="Result", interactive=False)
+            train_button.click(fn=train_model, outputs=[plot_output, text_output])
     gr.Markdown("---")
     gr.Markdown("""
     ### Get the full datasets