Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -21,8 +21,6 @@ from sklearn.naive_bayes import MultinomialNB
|
|
| 21 |
from sklearn.tree import DecisionTreeClassifier
|
| 22 |
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
|
| 23 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 24 |
-
from xgboost import XGBClassifier
|
| 25 |
-
from lightgbm import LGBMClassifier
|
| 26 |
from sklearn.svm import SVC
|
| 27 |
# Ignore FutureWarning messages
|
| 28 |
warnings.simplefilter(action='ignore', category=FutureWarning)
|
|
@@ -95,8 +93,6 @@ print('Data source import complete.')
|
|
| 95 |
import numpy as np # linear algebra
|
| 96 |
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
|
| 97 |
|
| 98 |
-
# Input data files are available in the read-only "../input/" directory
|
| 99 |
-
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
|
| 100 |
|
| 101 |
import os
|
| 102 |
for dirname, _, filenames in os.walk('/kaggle/input'):
|
|
@@ -158,136 +154,6 @@ vectorization = TfidfVectorizer()
|
|
| 158 |
XV_train = vectorization.fit_transform(X_train)
|
| 159 |
XV_test = vectorization.transform(X_test)
|
| 160 |
|
| 161 |
-
"""## Random forest and boosting methods
|
| 162 |
-
|
| 163 |
-
### Random forest
|
| 164 |
-
"""
|
| 165 |
-
|
| 166 |
-
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
|
| 167 |
-
rf_classifier.fit(XV_train, y_train)
|
| 168 |
-
|
| 169 |
-
rf_pred = rf_classifier.predict(XV_test)
|
| 170 |
-
|
| 171 |
-
print("Random Forest Accuracy:", accuracy_score(y_test, rf_pred))
|
| 172 |
-
|
| 173 |
-
print("\nRandom Forest Classification Report:")
|
| 174 |
-
print(classification_report(y_test, rf_pred))
|
| 175 |
-
|
| 176 |
-
ConfusionMatrixDisplay.from_predictions(y_test, rf_pred);
|
| 177 |
-
|
| 178 |
-
"""### Adaboost boosting method"""
|
| 179 |
-
|
| 180 |
-
ada_classifier = AdaBoostClassifier()
|
| 181 |
-
ada_classifier.fit(XV_train, y_train)
|
| 182 |
-
|
| 183 |
-
ada_pred = ada_classifier.predict(XV_test)
|
| 184 |
-
|
| 185 |
-
print("AdaBoost Accuracy:", accuracy_score(y_test, ada_pred))
|
| 186 |
-
|
| 187 |
-
print("\nAdaBoost Classification Report:")
|
| 188 |
-
print(classification_report(y_test, ada_pred))
|
| 189 |
-
|
| 190 |
-
ConfusionMatrixDisplay.from_predictions(y_test, ada_pred);
|
| 191 |
-
|
| 192 |
-
"""### Gradient Boosting"""
|
| 193 |
-
|
| 194 |
-
from sklearn.ensemble import GradientBoostingClassifier
|
| 195 |
-
# Gradient Boosting Machine (GBM)
|
| 196 |
-
gbm_classifier = GradientBoostingClassifier()
|
| 197 |
-
gbm_classifier.fit(XV_train, y_train)
|
| 198 |
-
y_pred_gbm = gbm_classifier.predict(XV_test)
|
| 199 |
-
accuracy_gbm = accuracy_score(y_test, y_pred_gbm)
|
| 200 |
-
print("\nGradient Boosting Machine (GBM) Model:")
|
| 201 |
-
print("Accuracy:", accuracy_gbm)
|
| 202 |
-
report_gbm = classification_report(y_test, y_pred_gbm)
|
| 203 |
-
print("Gradient Boosting Machine (GBM) Classification Report:")
|
| 204 |
-
print(report_gbm)
|
| 205 |
-
# If you want to display confusion matrix for GBM, you can use:
|
| 206 |
-
ConfusionMatrixDisplay.from_predictions(y_test, y_pred_gbm)
|
| 207 |
-
|
| 208 |
-
"""### LightGBM"""
|
| 209 |
-
|
| 210 |
-
import lightgbm as lgb
|
| 211 |
-
from sklearn.metrics import accuracy_score, classification_report, ConfusionMatrixDisplay
|
| 212 |
-
|
| 213 |
-
# LightGBM
|
| 214 |
-
lgb_classifier = lgb.LGBMClassifier()
|
| 215 |
-
lgb_classifier.fit(XV_train, y_train)
|
| 216 |
-
y_pred_lgb = lgb_classifier.predict(XV_test)
|
| 217 |
-
accuracy_lgb = accuracy_score(y_test, y_pred_lgb)
|
| 218 |
-
print("\nLightGBM Model:")
|
| 219 |
-
print("Accuracy:", accuracy_lgb)
|
| 220 |
-
report_lgb = classification_report(y_test, y_pred_lgb)
|
| 221 |
-
print("LightGBM Classification Report:")
|
| 222 |
-
print(report_lgb)
|
| 223 |
-
# If you want to display confusion matrix for LightGBM, you can use:
|
| 224 |
-
ConfusionMatrixDisplay.from_predictions(y_test, y_pred_lgb)
|
| 225 |
-
|
| 226 |
-
"""## SVM(Support Vector Machine)
|
| 227 |
-
|
| 228 |
-
### Kernel ---> 'Linear'
|
| 229 |
-
"""
|
| 230 |
-
|
| 231 |
-
svm_classifier = SVC(kernel='linear')
|
| 232 |
-
svm_classifier.fit(XV_train, y_train)
|
| 233 |
-
|
| 234 |
-
svm_pred = svm_classifier.predict(XV_test)
|
| 235 |
-
|
| 236 |
-
svm_accuracy = accuracy_score(y_test, svm_pred)
|
| 237 |
-
print(f"SVM with linear kernel Accuracy:", svm_accuracy)
|
| 238 |
-
|
| 239 |
-
print("\nSVM ( Kernel='linear' ) Classification Report:")
|
| 240 |
-
print(classification_report(y_test, svm_pred))
|
| 241 |
-
|
| 242 |
-
ConfusionMatrixDisplay.from_predictions(y_test,svm_pred);
|
| 243 |
-
|
| 244 |
-
"""### Kernel--->'Poly'"""
|
| 245 |
-
|
| 246 |
-
svm_classifier = SVC(kernel='poly')
|
| 247 |
-
svm_classifier.fit(XV_train, y_train)
|
| 248 |
-
|
| 249 |
-
svm_pred = svm_classifier.predict(XV_test)
|
| 250 |
-
|
| 251 |
-
svm_accuracy = accuracy_score(y_test, svm_pred)
|
| 252 |
-
print(f"SVM with poly kernel Accuracy:", svm_accuracy)
|
| 253 |
-
|
| 254 |
-
print("\nSVM ( Kernel='Poly' ) Classification Report:")
|
| 255 |
-
print(classification_report(y_test, svm_pred))
|
| 256 |
-
|
| 257 |
-
ConfusionMatrixDisplay.from_predictions(y_test,svm_pred);
|
| 258 |
-
|
| 259 |
-
"""### Kernel--->'RBF'"""
|
| 260 |
-
|
| 261 |
-
svm_classifier = SVC(kernel='rbf')
|
| 262 |
-
svm_classifier.fit(XV_train, y_train)
|
| 263 |
-
|
| 264 |
-
svm_pred = svm_classifier.predict(XV_test)
|
| 265 |
-
|
| 266 |
-
svm_accuracy = accuracy_score(y_test, svm_pred)
|
| 267 |
-
print(f"SVM with rbf kernel Accuracy:", svm_accuracy)
|
| 268 |
-
|
| 269 |
-
print("\nSVM ( Kernel='RBF' ) Classification Report:")
|
| 270 |
-
print(classification_report(y_test, svm_pred))
|
| 271 |
-
|
| 272 |
-
ConfusionMatrixDisplay.from_predictions(y_test,svm_pred);
|
| 273 |
-
|
| 274 |
-
"""# Decision Tree"""
|
| 275 |
-
|
| 276 |
-
from sklearn.tree import DecisionTreeClassifier, plot_tree
|
| 277 |
-
decision_tree=DecisionTreeClassifier(max_depth=20)
|
| 278 |
-
|
| 279 |
-
decision_tree.fit(XV_train,y_train)
|
| 280 |
-
|
| 281 |
-
dt_pred=decision_tree.predict(XV_test)
|
| 282 |
-
|
| 283 |
-
dt_accuracy=accuracy_score(y_test,dt_pred)
|
| 284 |
-
print(f"Decision Tree Accuracy with depth=20:", dt_accuracy)
|
| 285 |
-
|
| 286 |
-
print("\nDecision Tree Classification Report:")
|
| 287 |
-
print(classification_report(y_test, dt_pred))
|
| 288 |
-
|
| 289 |
-
ConfusionMatrixDisplay.from_predictions(y_test,dt_pred);
|
| 290 |
-
|
| 291 |
"""# Logistic Regression"""
|
| 292 |
|
| 293 |
logistic_model = LogisticRegression(max_iter=100)
|
|
@@ -306,66 +172,6 @@ print(report_logistic)
|
|
| 306 |
|
| 307 |
ConfusionMatrixDisplay.from_predictions(y_test,y_pred_logistic);
|
| 308 |
|
| 309 |
-
"""# Naive Bayes"""
|
| 310 |
-
|
| 311 |
-
nb_classifier = MultinomialNB()
|
| 312 |
-
|
| 313 |
-
nb_classifier.fit(XV_train, y_train)
|
| 314 |
-
|
| 315 |
-
y_pred = nb_classifier.predict(XV_test)
|
| 316 |
-
|
| 317 |
-
accuracy = accuracy_score(y_test, y_pred)
|
| 318 |
-
print("Naive Bayes Model:")
|
| 319 |
-
print("Accuracy:", accuracy)
|
| 320 |
-
|
| 321 |
-
report_naive_bayes = classification_report(y_test, y_pred)
|
| 322 |
-
print("Naive Bayes Classification Report:")
|
| 323 |
-
print(report_naive_bayes)
|
| 324 |
-
|
| 325 |
-
ConfusionMatrixDisplay.from_predictions(y_test,dt_pred);
|
| 326 |
-
|
| 327 |
-
"""# K Nearest Neightbors (KNN)"""
|
| 328 |
-
|
| 329 |
-
from sklearn.neighbors import KNeighborsClassifier
|
| 330 |
-
from sklearn.metrics import accuracy_score, classification_report, ConfusionMatrixDisplay
|
| 331 |
-
|
| 332 |
-
# K-Nearest Neighbors (KNN)
|
| 333 |
-
knn_classifier = KNeighborsClassifier()
|
| 334 |
-
knn_classifier.fit(XV_train, y_train)
|
| 335 |
-
y_pred_knn = knn_classifier.predict(XV_test)
|
| 336 |
-
accuracy_knn = accuracy_score(y_test, y_pred_knn)
|
| 337 |
-
print("K-Nearest Neighbors (KNN) Model:")
|
| 338 |
-
print("Accuracy:", accuracy_knn)
|
| 339 |
-
report_knn = classification_report(y_test, y_pred_knn)
|
| 340 |
-
print("K-Nearest Neighbors (KNN) Classification Report:")
|
| 341 |
-
print(report_knn)
|
| 342 |
-
# If you want to display confusion matrix for KNN, you can use:
|
| 343 |
-
ConfusionMatrixDisplay.from_predictions(y_test, y_pred_knn)
|
| 344 |
-
|
| 345 |
-
"""# Test"""
|
| 346 |
-
|
| 347 |
-
def output_lable(n):
|
| 348 |
-
if n == 0:
|
| 349 |
-
return "The Text Sentement is Negative"
|
| 350 |
-
elif n == 1:
|
| 351 |
-
return "The Text Sentement is Neutral"
|
| 352 |
-
elif n == 2:
|
| 353 |
-
return "The Text Sentement is Positive"
|
| 354 |
-
|
| 355 |
-
def manual_testing(news):
|
| 356 |
-
testing_news = {"text":[news]}
|
| 357 |
-
new_def_test = pd.DataFrame(testing_news)
|
| 358 |
-
new_def_test["text"] = new_def_test["text"].apply(wp)
|
| 359 |
-
new_x_test = new_def_test["text"]
|
| 360 |
-
new_xv_test = vectorization.transform(new_x_test)
|
| 361 |
-
pred_lr = logistic_model.predict(new_xv_test)
|
| 362 |
-
pred_svm = svm_classifier.predict(new_xv_test)
|
| 363 |
-
|
| 364 |
-
return print((output_lable(pred_lr[0])))
|
| 365 |
-
|
| 366 |
-
text = input("Enter Text to Classify ")
|
| 367 |
-
manual_testing(text)
|
| 368 |
-
|
| 369 |
pip install gradio
|
| 370 |
|
| 371 |
import gradio as gr
|
|
|
|
| 21 |
from sklearn.tree import DecisionTreeClassifier
|
| 22 |
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
|
| 23 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
|
|
|
|
|
| 24 |
from sklearn.svm import SVC
|
| 25 |
# Ignore FutureWarning messages
|
| 26 |
warnings.simplefilter(action='ignore', category=FutureWarning)
|
|
|
|
| 93 |
import numpy as np # linear algebra
|
| 94 |
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
|
| 95 |
|
|
|
|
|
|
|
| 96 |
|
| 97 |
import os
|
| 98 |
for dirname, _, filenames in os.walk('/kaggle/input'):
|
|
|
|
| 154 |
XV_train = vectorization.fit_transform(X_train)
|
| 155 |
XV_test = vectorization.transform(X_test)
|
| 156 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
"""# Logistic Regression"""
|
| 158 |
|
| 159 |
logistic_model = LogisticRegression(max_iter=100)
|
|
|
|
| 172 |
|
| 173 |
ConfusionMatrixDisplay.from_predictions(y_test,y_pred_logistic);
|
| 174 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
pip install gradio
|
| 176 |
|
| 177 |
import gradio as gr
|