Daizzyy commited on
Commit
5de0c48
·
verified ·
1 Parent(s): d1d0bea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -55
app.py CHANGED
@@ -1,53 +1,59 @@
1
  import gradio as gr
2
  import joblib
 
3
  import os
4
- import numpy as np
5
-
6
-
7
 
8
  def load_model():
9
- """Load joblib model and components"""
10
  try:
11
- print("Loading joblib model...")
12
 
13
- # Load model (try different possible names)
14
- model = None
15
- model_file = None
 
16
 
17
- if os.path.exists("model.safetensors"):
18
- print("Found model.safetensors")
19
- model = joblib.load("model.safetensors")
20
- model_file = "model.safetensors"
21
- elif os.path.exists("model.jobilib"):
22
- print("Found model.jobilib")
23
- model = joblib.load("model.jobilib")
24
- model_file = "model.jobilib"
25
- elif os.path.exists("tfidf_logreg_best.jobilib"):
26
- print("Found tfidf_logreg_best.jobilib")
27
- model = joblib.load("tfidf_logreg_best.jobilib")
28
- model_file = "tfidf_logreg_best.jobilib"
29
- else:
30
- # List available files
31
- files = os.listdir(".")
32
- print(f"Available files: {files}")
33
- raise FileNotFoundError("No model file found")
 
 
 
 
 
 
 
 
 
 
34
 
35
- # Load vectorizer/tokenizer
36
- vectorizer = None
37
- if os.path.exists("vocab"):
38
- print("Found vocab file")
39
- vectorizer = joblib.load("vocab")
40
- elif os.path.exists("tokenizer"):
41
- print("Found tokenizer file")
42
- vectorizer = joblib.load("tokenizer")
43
 
44
- # Load label encoder
45
- label_encoder = None
46
- if os.path.exists("label_encoder.jobilib"):
47
- print("Found label_encoder.jobilib")
48
- label_encoder = joblib.load("label_encoder.jobilib")
 
 
 
 
 
 
49
 
50
- print(f"✅ Model loaded successfully from {model_file}")
51
  return {
52
  "model": model,
53
  "vectorizer": vectorizer,
@@ -55,42 +61,51 @@ def load_model():
55
  }
56
 
57
  except Exception as e:
58
- print(f"❌ Error loading joblib model: {str(e)}")
 
 
59
  return None
60
 
61
- # Load model
 
62
  model_components = load_model()
63
 
 
 
 
 
64
 
65
 
66
  def predict(text):
67
- """Predict cyberbullying category using joblib model"""
68
  if not text.strip():
69
  return "<div class='warn'>⚠️ Please enter some text.</div>"
70
 
71
  try:
 
72
  if model_components is None:
73
- return "<div class='warn'>❌ Model not loaded properly</div>"
74
 
75
  model = model_components["model"]
76
  vectorizer = model_components["vectorizer"]
77
  label_encoder = model_components["label_encoder"]
78
 
79
- # Vectorize the text
80
- if vectorizer is not None:
81
- text_vector = vectorizer.transform([text])
82
- else:
83
- return "<div class='warn'>❌ Vectorizer not found</div>"
 
84
 
85
  # Get prediction
86
  prediction = model.predict(text_vector)[0]
87
 
88
- # Get probability if available
89
  try:
90
  probabilities = model.predict_proba(text_vector)[0]
91
  score = max(probabilities)
92
  except:
93
- score = 0.8 # Default score
94
 
95
  # Decode label if encoder exists
96
  if label_encoder is not None:
@@ -101,9 +116,9 @@ def predict(text):
101
  else:
102
  label = str(prediction)
103
 
104
- print(f"Prediction: {label}, Score: {score}")
105
 
106
- # Category definitions
107
  cyberbullying_types = {
108
  "age": {"emoji": "👶", "color": "#ff6b6b", "text": "Age-Based Cyberbullying"},
109
  "gender": {"emoji": "⚥️", "color": "#ff8c42", "text": "Gender-Based Cyberbullying"},
@@ -113,9 +128,12 @@ def predict(text):
113
  "not_cyberbullying": {"emoji": "✅", "color": "#00ff64", "text": "Safe Message"}
114
  }
115
 
116
- # Get category (handle case variations)
117
  label_lower = str(label).lower().strip()
118
- category = cyberbullying_types.get(label_lower, cyberbullying_types.get(label, cyberbullying_types["not_cyberbullying"]))
 
 
 
119
 
120
  # Safe message
121
  if label_lower == "not_cyberbullying":
@@ -146,7 +164,7 @@ def predict(text):
146
  except Exception as e:
147
  import traceback
148
  error_msg = traceback.format_exc()
149
- print(f"ERROR: {str(e)}")
150
  print(error_msg)
151
  return f"<div class='warn'>❌ Error: {str(e)}</div>"
152
 
 
1
  import gradio as gr
2
  import joblib
3
+ from huggingface_hub import hf_hub_download
4
  import os
 
 
 
5
 
6
  def load_model():
7
+ """Download and load model from HF Hub"""
8
  try:
9
+ print("Loading model from Hugging Face Hub...")
10
 
11
+
12
+ HF_USERNAME = "Daizzyy"
13
+ HF_MODEL = "cyberbullying-model"
14
+ repo_id = f"{HF_USERNAME}/{HF_MODEL}"
15
 
16
+ print(f"Downloading from: {repo_id}")
17
+
18
+ # Download and load model
19
+ try:
20
+ model_file = hf_hub_download(
21
+ repo_id=repo_id,
22
+ filename="tfidf_logreg_best.jobilib",
23
+ cache_dir=".cache"
24
+ )
25
+ model = joblib.load(model_file)
26
+ print(f"✅ Model loaded successfully")
27
+ except Exception as e:
28
+ print(f"❌ Error loading model: {e}")
29
+ return None
30
+
31
+ # Download and load vectorizer
32
+ try:
33
+ vectorizer_file = hf_hub_download(
34
+ repo_id=repo_id,
35
+ filename="vocab",
36
+ cache_dir=".cache"
37
+ )
38
+ vectorizer = joblib.load(vectorizer_file)
39
+ print(f"✅ Vectorizer loaded successfully")
40
+ except Exception as e:
41
+ print(f"❌ Error loading vectorizer: {e}")
42
+ vectorizer = None
43
 
 
 
 
 
 
 
 
 
44
 
45
+ try:
46
+ label_encoder_file = hf_hub_download(
47
+ repo_id=repo_id,
48
+ filename="label_encoder.jobilib",
49
+ cache_dir=".cache"
50
+ )
51
+ label_encoder = joblib.load(label_encoder_file)
52
+ print(f"✅ Label encoder loaded successfully")
53
+ except Exception as e:
54
+ print(f"⚠️ Label encoder not found (optional): {e}")
55
+ label_encoder = None
56
 
 
57
  return {
58
  "model": model,
59
  "vectorizer": vectorizer,
 
61
  }
62
 
63
  except Exception as e:
64
+ print(f"❌ Critical error loading model: {str(e)}")
65
+ import traceback
66
+ print(traceback.format_exc())
67
  return None
68
 
69
+ # Load model on startup
70
+ print("Starting model loading...")
71
  model_components = load_model()
72
 
73
+ if model_components is None:
74
+ print("⚠️ Model loading failed!")
75
+ else:
76
+ print("✅ All models loaded successfully!")
77
 
78
 
79
  def predict(text):
80
+ """Predict cyberbullying category"""
81
  if not text.strip():
82
  return "<div class='warn'>⚠️ Please enter some text.</div>"
83
 
84
  try:
85
+ # Check if models are loaded
86
  if model_components is None:
87
+ return "<div class='warn'>❌ Model not loaded. Please check server logs.</div>"
88
 
89
  model = model_components["model"]
90
  vectorizer = model_components["vectorizer"]
91
  label_encoder = model_components["label_encoder"]
92
 
93
+ # Check if vectorizer exists
94
+ if vectorizer is None:
95
+ return "<div class='warn'>❌ Vectorizer not available</div>"
96
+
97
+ # Vectorize the input text
98
+ text_vector = vectorizer.transform([text])
99
 
100
  # Get prediction
101
  prediction = model.predict(text_vector)[0]
102
 
103
+ # Get confidence score
104
  try:
105
  probabilities = model.predict_proba(text_vector)[0]
106
  score = max(probabilities)
107
  except:
108
+ score = 0.8
109
 
110
  # Decode label if encoder exists
111
  if label_encoder is not None:
 
116
  else:
117
  label = str(prediction)
118
 
119
+ print(f"Prediction: {label}, Score: {score:.4f}")
120
 
121
+ # Category definitions with colors and emojis
122
  cyberbullying_types = {
123
  "age": {"emoji": "👶", "color": "#ff6b6b", "text": "Age-Based Cyberbullying"},
124
  "gender": {"emoji": "⚥️", "color": "#ff8c42", "text": "Gender-Based Cyberbullying"},
 
128
  "not_cyberbullying": {"emoji": "✅", "color": "#00ff64", "text": "Safe Message"}
129
  }
130
 
131
+ # Get category info (handle case variations)
132
  label_lower = str(label).lower().strip()
133
+ category = cyberbullying_types.get(
134
+ label_lower,
135
+ cyberbullying_types.get(label, cyberbullying_types["not_cyberbullying"])
136
+ )
137
 
138
  # Safe message
139
  if label_lower == "not_cyberbullying":
 
164
  except Exception as e:
165
  import traceback
166
  error_msg = traceback.format_exc()
167
+ print(f"ERROR in prediction: {str(e)}")
168
  print(error_msg)
169
  return f"<div class='warn'>❌ Error: {str(e)}</div>"
170