ShanukaB commited on
Commit
70521f4
·
verified ·
1 Parent(s): dc1b195

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -34
app.py CHANGED
@@ -6,7 +6,7 @@ import json
6
  import shutil
7
  from pathlib import Path
8
  from huggingface_hub import hf_hub_download
9
- from transformers import pipeline
10
 
11
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
12
 
@@ -23,49 +23,52 @@ def load_models():
23
  logging.info("📥 Loading models...")
24
 
25
  try:
26
- # ====================== English Model ======================
27
  en_repo = "E-motionAssistant/English_LR_Model_New"
28
  en_vectorizer = joblib.load(hf_hub_download(en_repo, "tfidf_vectorizer.joblib"))
29
  en_classifier = joblib.load(hf_hub_download(en_repo, "logreg_model.joblib"))
30
  en_label_encoder = joblib.load(hf_hub_download(en_repo, "label_encoder.joblib"))
31
 
32
- # Load emotion_map.json
33
  try:
34
  map_path = hf_hub_download(en_repo, "emotion_map.json")
35
  with open(map_path, "r", encoding="utf-8") as f:
36
  en_emotion_map = json.load(f)
37
- logging.info("✅ emotion_map.json loaded for English")
38
  except:
39
- logging.warning("Could not load emotion_map.json")
40
  en_emotion_map = None
41
 
42
- # ====================== Sinhala Model ======================
43
  si_vectorizer = joblib.load(hf_hub_download("E-motionAssistant/Sinhala_Text_Emotion_Model_LR", "tfidf_vectorizer.joblib"))
44
  si_classifier = joblib.load(hf_hub_download("E-motionAssistant/Sinhala_Text_Emotion_Model_LR", "logreg_model.joblib"))
45
  si_label_encoder = joblib.load(hf_hub_download("E-motionAssistant/Sinhala_Text_Emotion_Model_LR", "label_encoder.joblib"))
46
 
47
- # ====================== TAMIL - FIXED VERSION ======================
48
- logging.info("📥 Loading Tamil model...")
49
 
50
- # Clean old cache (helps fix "always joy" issue)
51
  try:
52
  cache_dir = Path.home() / ".cache" / "huggingface" / "hub"
53
  model_cache = cache_dir / "models--E-motionAssistant--Tamil_Emotion_Recognition_Model"
54
  if model_cache.exists():
55
  shutil.rmtree(model_cache)
56
- logging.info("🧹 Cleaned old Tamil model cache")
57
  except:
58
  pass
59
 
 
 
 
 
 
60
  tamil_pipe = pipeline(
61
  "text-classification",
62
- model="E-motionAssistant/Tamil_Emotion_Recognition_Model",
63
- tokenizer="E-motionAssistant/Tamil_Emotion_Recognition_Model", # Explicit tokenizer
64
  device=-1,
65
  truncation=True,
66
- max_length=512
 
67
  )
68
- logging.info("✅ Tamil model + tokenizer loaded successfully")
 
69
 
70
  models = (en_vectorizer, en_classifier, en_label_encoder,
71
  si_vectorizer, si_classifier, si_label_encoder, tamil_pipe)
@@ -90,7 +93,7 @@ class PredictRequest(BaseModel):
90
 
91
  @app.get("/")
92
  def root():
93
- return {"status": "ok", "message": "Emotion Detector API is running"}
94
 
95
 
96
  @app.post("/predict")
@@ -98,9 +101,7 @@ def predict(req: PredictRequest):
98
  if not req.text or not req.text.strip():
99
  return {"error": "Text cannot be empty"}
100
 
101
- # Safety check
102
  if models is None:
103
- logging.warning("Models not loaded. Loading now...")
104
  load_models()
105
 
106
  en_vec, en_clf, en_le, si_vec, si_clf, si_le, tamil_pipe = models
@@ -121,22 +122,16 @@ def predict(req: PredictRequest):
121
  return {"emotion": str(emotion), "language": "Sinhala"}
122
 
123
  elif lang == "tamil":
124
- logging.info(f"Tamil input: '{req.text[:150]}...'")
125
 
126
- result = tamil_pipe(req.text, truncation=True, max_length=512)
127
 
128
- logging.info(f"Tamil raw output: {result}")
129
 
130
- # Get top prediction
131
- if isinstance(result, list) and len(result) > 0:
132
- top = result[0] if isinstance(result[0], dict) else result[0][0]
133
- emotion = top["label"]
134
- score = round(float(top["score"]), 4)
135
- else:
136
- emotion = "joy"
137
- score = 0.0
138
 
139
- logging.info(f"Tamil Final Prediction → {emotion} (Confidence: {score})")
140
 
141
  return {
142
  "emotion": emotion,
@@ -144,9 +139,6 @@ def predict(req: PredictRequest):
144
  "language": "Tamil"
145
  }
146
 
147
- else:
148
- return {"error": f"Unsupported language: {req.language}"}
149
-
150
  except Exception as e:
151
- logging.error(f"Prediction error: {e}")
152
- return {"error": "Prediction failed. Please try again."}
 
6
  import shutil
7
  from pathlib import Path
8
  from huggingface_hub import hf_hub_download
9
+ from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
10
 
11
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
12
 
 
23
  logging.info("📥 Loading models...")
24
 
25
  try:
26
+ # English & Sinhala (unchanged)
27
  en_repo = "E-motionAssistant/English_LR_Model_New"
28
  en_vectorizer = joblib.load(hf_hub_download(en_repo, "tfidf_vectorizer.joblib"))
29
  en_classifier = joblib.load(hf_hub_download(en_repo, "logreg_model.joblib"))
30
  en_label_encoder = joblib.load(hf_hub_download(en_repo, "label_encoder.joblib"))
31
 
 
32
  try:
33
  map_path = hf_hub_download(en_repo, "emotion_map.json")
34
  with open(map_path, "r", encoding="utf-8") as f:
35
  en_emotion_map = json.load(f)
 
36
  except:
 
37
  en_emotion_map = None
38
 
 
39
  si_vectorizer = joblib.load(hf_hub_download("E-motionAssistant/Sinhala_Text_Emotion_Model_LR", "tfidf_vectorizer.joblib"))
40
  si_classifier = joblib.load(hf_hub_download("E-motionAssistant/Sinhala_Text_Emotion_Model_LR", "logreg_model.joblib"))
41
  si_label_encoder = joblib.load(hf_hub_download("E-motionAssistant/Sinhala_Text_Emotion_Model_LR", "label_encoder.joblib"))
42
 
43
+ # ====================== TAMIL - STRONG FIX ======================
44
+ logging.info("📥 Loading Tamil model with manual components...")
45
 
46
+ # Clean cache
47
  try:
48
  cache_dir = Path.home() / ".cache" / "huggingface" / "hub"
49
  model_cache = cache_dir / "models--E-motionAssistant--Tamil_Emotion_Recognition_Model"
50
  if model_cache.exists():
51
  shutil.rmtree(model_cache)
52
+ logging.info("🧹 Cleaned Tamil cache")
53
  except:
54
  pass
55
 
56
+ # Load manually (more reliable than pipeline sometimes)
57
+ model_name = "E-motionAssistant/Tamil_Emotion_Recognition_Model"
58
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
59
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
60
+
61
  tamil_pipe = pipeline(
62
  "text-classification",
63
+ model=model,
64
+ tokenizer=tokenizer,
65
  device=-1,
66
  truncation=True,
67
+ max_length=512,
68
+ top_k=1
69
  )
70
+
71
+ logging.info("✅ Tamil model loaded with manual tokenizer & model")
72
 
73
  models = (en_vectorizer, en_classifier, en_label_encoder,
74
  si_vectorizer, si_classifier, si_label_encoder, tamil_pipe)
 
93
 
94
  @app.get("/")
95
  def root():
96
+ return {"status": "ok"}
97
 
98
 
99
  @app.post("/predict")
 
101
  if not req.text or not req.text.strip():
102
  return {"error": "Text cannot be empty"}
103
 
 
104
  if models is None:
 
105
  load_models()
106
 
107
  en_vec, en_clf, en_le, si_vec, si_clf, si_le, tamil_pipe = models
 
122
  return {"emotion": str(emotion), "language": "Sinhala"}
123
 
124
  elif lang == "tamil":
125
+ logging.info(f"Tamil input: {req.text[:200]}...")
126
 
127
+ result = tamil_pipe(req.text)
128
 
129
+ logging.info(f"Tamil raw result: {result}")
130
 
131
+ emotion = result[0]['label']
132
+ score = round(float(result[0]['score']), 4)
 
 
 
 
 
 
133
 
134
+ logging.info(f"Tamil Final → {emotion} ({score})")
135
 
136
  return {
137
  "emotion": emotion,
 
139
  "language": "Tamil"
140
  }
141
 
 
 
 
142
  except Exception as e:
143
+ logging.error(f"Error: {e}")
144
+ return {"error": str(e)}