Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from xgboost import XGBClassifier | |
| from sklearn.metrics import accuracy_score | |
| import joblib | |
| # Load dataset | |
| df = pd.read_csv("dummy_sentiment_dataset.csv") | |
| # Split | |
| X_train, X_test, y_train, y_test = train_test_split( | |
| df["text"], df["label"], test_size=0.2, random_state=42 | |
| ) | |
| # TF-IDF | |
| tfidf = TfidfVectorizer(max_features=5000) | |
| X_train_tfidf = tfidf.fit_transform(X_train) | |
| X_test_tfidf = tfidf.transform(X_test) | |
| # Model | |
| model = XGBClassifier( | |
| n_estimators=300, | |
| max_depth=6, | |
| learning_rate=0.1, | |
| eval_metric='logloss' | |
| ) | |
| model.fit(X_train_tfidf, y_train) | |
| # Evaluate | |
| y_pred = model.predict(X_test_tfidf) | |
| print("Accuracy:", accuracy_score(y_test, y_pred)) | |
| # Save model + vectorizer | |
| joblib.dump(model, "model.joblib") | |
| joblib.dump(tfidf, "tfidf_vectorizer.joblib") | |
| print("✅ Model and vectorizer saved!") | |