Fizza03 commited on
Commit
b764b4e
Β·
verified Β·
1 Parent(s): c1dfa2e

Add test_team.py

Browse files
Files changed (1) hide show
  1. test_team.py +11 -64
test_team.py CHANGED
@@ -1,70 +1,17 @@
1
- # train.py
2
 
3
- import pandas as pd
4
  import joblib
5
  import numpy as np
6
- from sklearn.linear_model import LogisticRegression
7
- from sklearn.feature_extraction.text import TfidfVectorizer
8
- from sklearn.multiclass import OneVsRestClassifier
9
- from sklearn.model_selection import train_test_split
10
- from sklearn.metrics import f1_score
11
 
12
- LABELS = ['admiration','anger','disgust','fear','hope','joy','love','pride','sadness']
 
13
 
14
- def to_binary(label_string):
15
- present = [e.strip() for e in str(label_string).split(',')]
16
- return [1 if label in present else 0 for label in LABELS]
 
 
17
 
18
- # ── load ──────────────────────────────────────────────────────────
19
- print("Loading dataset...")
20
- df = pd.read_excel("Multi-Labeled_Emotions_Modified.xlsx")
21
- df = df[['Tweets (text)', 'Emotions (Multi-labeled)']].dropna()
22
- print(f"Total rows: {len(df)}")
23
-
24
- X = df['Tweets (text)'].tolist()
25
- y = [to_binary(row) for row in df['Emotions (Multi-labeled)']]
26
-
27
- # ── split ─────────────────────────────────────────────────────────
28
- X_train, X_test, y_train, y_test = train_test_split(
29
- X, y, test_size=0.2, random_state=42
30
- )
31
- print(f"Train: {len(X_train)} rows")
32
- print(f"Test: {len(X_test)} rows")
33
-
34
- # ── save test set as hidden test data ─────────────────────────────
35
- test_emotions = [
36
- ', '.join([LABELS[i] for i, val in enumerate(row) if val == 1])
37
- for row in y_test
38
- ]
39
- test_df = pd.DataFrame({
40
- 'Tweets (text)': X_test,
41
- 'Emotions (Multi-labeled)': test_emotions
42
- })
43
- test_df.to_excel("test_set.xlsx", index=False)
44
- print("Saved test_set.xlsx")
45
-
46
- # ── train ─────────────────────────────────────────────────────────
47
- print("Training...")
48
- vectorizer = TfidfVectorizer(max_features=10000, ngram_range=(1,2))
49
- X_train_tfidf = vectorizer.fit_transform(X_train)
50
- X_test_tfidf = vectorizer.transform(X_test)
51
-
52
- classifier = OneVsRestClassifier(
53
- LogisticRegression(max_iter=1000, C=1.0)
54
- )
55
- classifier.fit(X_train_tfidf, np.array(y_train))
56
- print("Training done.")
57
-
58
- # ── quick check ───────────────────────────────────────────────────
59
- y_pred = classifier.predict(X_test_tfidf)
60
- f1 = f1_score(np.array(y_test), y_pred, average='macro', zero_division=0)
61
- print(f"F1 score: {f1:.4f}")
62
-
63
- # ── save ──────────────────────────────────────────────────────────
64
- model_bundle = {
65
- "vectorizer": vectorizer,
66
- "classifier": classifier,
67
- "labels": LABELS
68
- }
69
- joblib.dump(model_bundle, "model.pkl")
70
- print("Saved model.pkl")
 
1
+ # model_wrapper.py
2
 
 
3
  import joblib
4
  import numpy as np
 
 
 
 
 
5
 
6
+ LABELS = ['admiration','anger','disgust','fear','hope',
7
+ 'joy','love','pride','sadness']
8
 
9
+ class MyModel:
10
+ def __init__(self):
11
+ bundle = joblib.load("model.pkl")
12
+ self.vectorizer = bundle["vectorizer"]
13
+ self.classifier = bundle["classifier"]
14
 
15
+ def predict(self, texts):
16
+ X = self.vectorizer.transform(texts)
17
+ return self.classifier.predict(X)