Spaces:
Running
Running
Abid Ali Awan commited on
Commit ·
78ee5c9
1
Parent(s): 29b0e11
feat: Limit training dataset to 10,000 rows for improved processing speed and update success message with dataset information
Browse files- modal_backend.py +15 -1
modal_backend.py
CHANGED
|
@@ -38,6 +38,14 @@ def train_model(csv_content: str, target_col: str, task_type: str = "classificat
|
|
| 38 |
"""
|
| 39 |
df = pd.read_csv(io.StringIO(csv_content))
|
| 40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
if target_col not in df.columns:
|
| 42 |
raise ValueError(f"Target column '{target_col}' not found in dataset.")
|
| 43 |
|
|
@@ -100,10 +108,16 @@ def train_model(csv_content: str, target_col: str, task_type: str = "classificat
|
|
| 100 |
json.dump({"columns": list(X.columns), "task_type": task_type}, f)
|
| 101 |
volume.commit()
|
| 102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
return {
|
| 104 |
"model_id": model_id,
|
| 105 |
"metrics": metrics,
|
| 106 |
-
"message":
|
| 107 |
}
|
| 108 |
|
| 109 |
|
|
|
|
| 38 |
"""
|
| 39 |
df = pd.read_csv(io.StringIO(csv_content))
|
| 40 |
|
| 41 |
+
# Limit training to 10,000 rows for faster processing
|
| 42 |
+
original_rows = len(df)
|
| 43 |
+
if original_rows > 10000:
|
| 44 |
+
df = df.head(10000)
|
| 45 |
+
print(f"Dataset contains {original_rows:,} rows. Using first 10,000 rows for training to speed up the process.")
|
| 46 |
+
else:
|
| 47 |
+
print(f"Dataset contains {original_rows:,} rows. Using all available data for training.")
|
| 48 |
+
|
| 49 |
if target_col not in df.columns:
|
| 50 |
raise ValueError(f"Target column '{target_col}' not found in dataset.")
|
| 51 |
|
|
|
|
| 108 |
json.dump({"columns": list(X.columns), "task_type": task_type}, f)
|
| 109 |
volume.commit()
|
| 110 |
|
| 111 |
+
# Create training message with dataset info
|
| 112 |
+
if original_rows > 10000:
|
| 113 |
+
message = f"{task_type.capitalize()} model trained successfully on 10,000 rows (original dataset had {original_rows:,} rows)."
|
| 114 |
+
else:
|
| 115 |
+
message = f"{task_type.capitalize()} model trained successfully on {original_rows:,} rows."
|
| 116 |
+
|
| 117 |
return {
|
| 118 |
"model_id": model_id,
|
| 119 |
"metrics": metrics,
|
| 120 |
+
"message": message,
|
| 121 |
}
|
| 122 |
|
| 123 |
|