Spaces:

QSBench
/

Multi-Target_Regression

Running

App Files Files Community

QSBench commited on 4 days ago

Commit

c7ba23d

1 Parent(s): 9c18e8c

fix

Browse files

Files changed (1) hide show

app.py +38 -36

app.py CHANGED Viewed

@@ -4,52 +4,54 @@ from datasets import load_dataset
 from sklearn.ensemble import RandomForestRegressor
 from sklearn.metrics import r2_score
 import matplotlib.pyplot as plt
 # Загружаем датасет
 ds = load_dataset("QSBench/QSBench-Core-v1.0.0-demo")
-# Определяем доступные сплиты (предполагаем train, val, test)
-available_splits = list(ds.keys())
-print("Available splits:", available_splits)  # для отладки в логах
-# Функция для отображения данных выбранного сплита
 def show_data(split):
     try:
         df = pd.DataFrame(ds[split])
         return df.head(10)
     except Exception as e:
-        # Возвращаем пустой DataFrame с сообщением об ошибке
-        return pd.DataFrame({"Error": [str(e)]})
 # Функция для обучения модели и создания графика
 def train_model():
-    try:
-        # Проверяем, есть ли нужные сплиты
-        if "train" not in ds or "test" not in ds:
-            return None  # или вернуть сообщение
-        feature_cols = ["total_gates", "gate_entropy", "meyer_wallach"]
-        target_col = "ideal_expval_Z_global"
-        X_train = pd.DataFrame(ds["train"])[feature_cols]
-        y_train = pd.DataFrame(ds["train"])[target_col]
-        X_test = pd.DataFrame(ds["test"])[feature_cols]
-        y_test = pd.DataFrame(ds["test"])[target_col]
-        model = RandomForestRegressor(n_estimators=100, random_state=42)
-        model.fit(X_train, y_train)
-        y_pred = model.predict(X_test)
-        r2 = r2_score(y_test, y_pred)
-        fig, ax = plt.subplots()
-        ax.scatter(y_test, y_pred, alpha=0.5)
-        ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
-        ax.set_xlabel("True value")
-        ax.set_ylabel("Predicted")
-        ax.set_title(f"Predictions vs. Truth (R² = {r2:.4f})")
-        return fig
-    except Exception as e:
-        # В случае ошибки возвращаем сообщение как график? проще вернуть None и показать текст
-        raise e
 # Интерфейс
 with gr.Blocks(title="QSBench Demo Explorer") as demo:
@@ -64,17 +66,17 @@ with gr.Blocks(title="QSBench Demo Explorer") as demo:
     with gr.Tabs():
         with gr.TabItem("Data Explorer"):
-            # Определяем список сплитов из загруженного датасета
-            split_choices = available_splits if available_splits else ["train", "val", "test"]
             split_selector = gr.Dropdown(
-                choices=split_choices,
                 label="Choose a split",
-                value=split_choices[0] if split_choices else "train"
             )
             data_table = gr.Dataframe(label="First 10 rows", interactive=False)
             split_selector.change(fn=show_data, inputs=split_selector, outputs=data_table)
             # Загружаем данные по умолчанию
-            demo.load(fn=lambda: show_data(split_choices[0] if split_choices else "train"), outputs=data_table)
         with gr.TabItem("Model Demo"):
             train_button = gr.Button("Train Random Forest")

 from sklearn.ensemble import RandomForestRegressor
 from sklearn.metrics import r2_score
 import matplotlib.pyplot as plt
+import sys
 # Загружаем датасет
+print("Loading dataset...")
 ds = load_dataset("QSBench/QSBench-Core-v1.0.0-demo")
+print("Available splits:", list(ds.keys()))  # выводим в логи
+# Функция для отображения таблицы
 def show_data(split):
     try:
         df = pd.DataFrame(ds[split])
         return df.head(10)
     except Exception as e:
+        return f"Error loading data: {e}"
 # Функция для обучения модели и создания графика
 def train_model():
+    # Определяем доступные сплиты
+    splits = list(ds.keys())
+    if "test" not in splits:
+        return "Error: 'test' split not found in dataset", None
+    feature_cols = ["total_gates", "gate_entropy", "meyer_wallach"]
+    target_col = "ideal_expval_Z_global"
+    # Проверяем наличие колонок
+    sample_df = pd.DataFrame(ds[splits[0]])
+    if not all(col in sample_df.columns for col in feature_cols + [target_col]):
+        missing = [col for col in feature_cols + [target_col] if col not in sample_df.columns]
+        return f"Error: missing columns: {missing}", None
+    X_train = pd.DataFrame(ds["train"])[feature_cols]
+    y_train = pd.DataFrame(ds["train"])[target_col]
+    X_test = pd.DataFrame(ds["test"])[feature_cols]
+    y_test = pd.DataFrame(ds["test"])[target_col]
+    model = RandomForestRegressor(n_estimators=100, random_state=42)
+    model.fit(X_train, y_train)
+    y_pred = model.predict(X_test)
+    r2 = r2_score(y_test, y_pred)
+    fig, ax = plt.subplots()
+    ax.scatter(y_test, y_pred, alpha=0.5)
+    ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
+    ax.set_xlabel("True value")
+    ax.set_ylabel("Predicted")
+    ax.set_title(f"Predictions vs. Truth (R² = {r2:.4f})")
+    return fig
 # Интерфейс
 with gr.Blocks(title="QSBench Demo Explorer") as demo:
     with gr.Tabs():
         with gr.TabItem("Data Explorer"):
+            # Определяем доступные сплиты для выпадающего списка
+            available_splits = list(ds.keys())
             split_selector = gr.Dropdown(
+                choices=available_splits,
                 label="Choose a split",
+                value=available_splits[0] if available_splits else None
             )
             data_table = gr.Dataframe(label="First 10 rows", interactive=False)
             split_selector.change(fn=show_data, inputs=split_selector, outputs=data_table)
             # Загружаем данные по умолчанию
+            demo.load(fn=lambda: show_data(available_splits[0]), outputs=data_table)
         with gr.TabItem("Model Demo"):
             train_button = gr.Button("Train Random Forest")