Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -217,29 +217,37 @@ class SentimentEngine:
|
|
| 217 |
def extract_key_words_shap(self, text: str, top_k: int = 10) -> List[Tuple[str, float]]:
|
| 218 |
"""Advanced keyword extraction using SHAP"""
|
| 219 |
try:
|
| 220 |
-
#
|
| 221 |
-
|
| 222 |
-
|
| 223 |
|
| 224 |
-
# Get
|
| 225 |
-
|
| 226 |
|
| 227 |
-
#
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
|
| 233 |
-
|
| 234 |
-
|
|
|
|
|
|
|
| 235 |
clean_word = re.sub(r'[^\w]', '', word.lower())
|
| 236 |
if len(clean_word) >= config.MIN_WORD_LENGTH:
|
| 237 |
-
word_scores.append((clean_word,
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
|
| 244 |
except Exception as e:
|
| 245 |
logger.error(f"SHAP extraction failed: {e}")
|
|
|
|
| 217 |
def extract_key_words_shap(self, text: str, top_k: int = 10) -> List[Tuple[str, float]]:
|
| 218 |
"""Advanced keyword extraction using SHAP"""
|
| 219 |
try:
|
| 220 |
+
# Simple SHAP implementation using model predictions
|
| 221 |
+
words = text.split()
|
| 222 |
+
word_scores = []
|
| 223 |
|
| 224 |
+
# Get baseline prediction
|
| 225 |
+
baseline_prob = self.predict_proba([text])[0][1] # Positive probability
|
| 226 |
|
| 227 |
+
# Calculate importance by removing each word
|
| 228 |
+
for i, word in enumerate(words):
|
| 229 |
+
# Create text without this word
|
| 230 |
+
modified_words = words[:i] + words[i+1:]
|
| 231 |
+
modified_text = ' '.join(modified_words)
|
| 232 |
|
| 233 |
+
if modified_text.strip():
|
| 234 |
+
modified_prob = self.predict_proba([modified_text])[0][1]
|
| 235 |
+
importance = abs(baseline_prob - modified_prob)
|
| 236 |
+
|
| 237 |
clean_word = re.sub(r'[^\w]', '', word.lower())
|
| 238 |
if len(clean_word) >= config.MIN_WORD_LENGTH:
|
| 239 |
+
word_scores.append((clean_word, importance))
|
| 240 |
+
|
| 241 |
+
# Remove duplicates and sort
|
| 242 |
+
unique_scores = {}
|
| 243 |
+
for word, score in word_scores:
|
| 244 |
+
if word in unique_scores:
|
| 245 |
+
unique_scores[word] = max(unique_scores[word], score)
|
| 246 |
+
else:
|
| 247 |
+
unique_scores[word] = score
|
| 248 |
+
|
| 249 |
+
sorted_scores = sorted(unique_scores.items(), key=lambda x: x[1], reverse=True)
|
| 250 |
+
return sorted_scores[:top_k]
|
| 251 |
|
| 252 |
except Exception as e:
|
| 253 |
logger.error(f"SHAP extraction failed: {e}")
|