entropy25 commited on
Commit
3ae0d14
·
verified ·
1 Parent(s): 3644b14

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -18
app.py CHANGED
@@ -217,29 +217,37 @@ class SentimentEngine:
217
  def extract_key_words_shap(self, text: str, top_k: int = 10) -> List[Tuple[str, float]]:
218
  """Advanced keyword extraction using SHAP"""
219
  try:
220
- # Initialize SHAP explainer if not already done
221
- if self.shap_explainer is None:
222
- self.shap_explainer = shap.Explainer(self.predict_proba, self.model_manager.tokenizer)
223
 
224
- # Get SHAP values
225
- shap_values = self.shap_explainer([text])
226
 
227
- # Extract word importance
228
- words = text.split()
229
- if len(shap_values.values) > 0 and len(shap_values.values[0]) > 0:
230
- # Get positive class SHAP values
231
- pos_shap_values = shap_values.values[0][:, 1] if len(shap_values.values[0].shape) > 1 else shap_values.values[0]
232
 
233
- word_scores = []
234
- for i, word in enumerate(words[:len(pos_shap_values)]):
 
 
235
  clean_word = re.sub(r'[^\w]', '', word.lower())
236
  if len(clean_word) >= config.MIN_WORD_LENGTH:
237
- word_scores.append((clean_word, abs(float(pos_shap_values[i]))))
238
-
239
- word_scores.sort(key=lambda x: x[1], reverse=True)
240
- return word_scores[:top_k]
241
-
242
- return []
 
 
 
 
 
 
243
 
244
  except Exception as e:
245
  logger.error(f"SHAP extraction failed: {e}")
 
217
  def extract_key_words_shap(self, text: str, top_k: int = 10) -> List[Tuple[str, float]]:
218
  """Advanced keyword extraction using SHAP"""
219
  try:
220
+ # Simple SHAP implementation using model predictions
221
+ words = text.split()
222
+ word_scores = []
223
 
224
+ # Get baseline prediction
225
+ baseline_prob = self.predict_proba([text])[0][1] # Positive probability
226
 
227
+ # Calculate importance by removing each word
228
+ for i, word in enumerate(words):
229
+ # Create text without this word
230
+ modified_words = words[:i] + words[i+1:]
231
+ modified_text = ' '.join(modified_words)
232
 
233
+ if modified_text.strip():
234
+ modified_prob = self.predict_proba([modified_text])[0][1]
235
+ importance = abs(baseline_prob - modified_prob)
236
+
237
  clean_word = re.sub(r'[^\w]', '', word.lower())
238
  if len(clean_word) >= config.MIN_WORD_LENGTH:
239
+ word_scores.append((clean_word, importance))
240
+
241
+ # Remove duplicates and sort
242
+ unique_scores = {}
243
+ for word, score in word_scores:
244
+ if word in unique_scores:
245
+ unique_scores[word] = max(unique_scores[word], score)
246
+ else:
247
+ unique_scores[word] = score
248
+
249
+ sorted_scores = sorted(unique_scores.items(), key=lambda x: x[1], reverse=True)
250
+ return sorted_scores[:top_k]
251
 
252
  except Exception as e:
253
  logger.error(f"SHAP extraction failed: {e}")