Update app.py
Browse files
app.py
CHANGED
|
@@ -38,13 +38,14 @@ label_encoder = LabelEncoder()
|
|
| 38 |
df['label_encoded'] = label_encoder.fit_transform(df['label'])
|
| 39 |
num_classes = len(label_encoder.classes_)
|
| 40 |
|
| 41 |
-
# Split the data
|
| 42 |
X_train, X_test, y_train, y_test = train_test_split(
|
| 43 |
df['text'],
|
| 44 |
df['label_encoded'],
|
| 45 |
test_size=0.2,
|
| 46 |
-
random_state=42,
|
| 47 |
-
|
|
|
|
| 48 |
)
|
| 49 |
|
| 50 |
# Feature extraction using TF-IDF
|
|
|
|
| 38 |
df['label_encoded'] = label_encoder.fit_transform(df['label'])
|
| 39 |
num_classes = len(label_encoder.classes_)
|
| 40 |
|
| 41 |
+
# Split the data - fix: remove stratify for classes with few samples
|
| 42 |
X_train, X_test, y_train, y_test = train_test_split(
|
| 43 |
df['text'],
|
| 44 |
df['label_encoded'],
|
| 45 |
test_size=0.2,
|
| 46 |
+
random_state=42,
|
| 47 |
+
# Only use stratify if we have enough samples
|
| 48 |
+
stratify=None # Removed stratification to fix the error
|
| 49 |
)
|
| 50 |
|
| 51 |
# Feature extraction using TF-IDF
|