Spaces:
Running
Running
aviseth commited on
Commit ·
69e2ee8
1
Parent(s): f2161c4
fix: f-string backslash syntax error in generate_explanation_text
Browse files- src/models/inference.py +13 -7
src/models/inference.py
CHANGED
|
@@ -153,7 +153,7 @@ class FakeNewsClassifier:
|
|
| 153 |
tokens = self.tokenizer.convert_ids_to_tokens(
|
| 154 |
input_ids[0].cpu().tolist())
|
| 155 |
special = {"[CLS]", "[SEP]", "[PAD]", "<s>",
|
| 156 |
-
|
| 157 |
pairs = [
|
| 158 |
(t.replace("##", "").replace("▁", "").replace("Ġ", ""), float(s))
|
| 159 |
for t, s in zip(tokens, importance)
|
|
@@ -190,7 +190,7 @@ class FakeNewsClassifier:
|
|
| 190 |
tokens = self.tokenizer.convert_ids_to_tokens(
|
| 191 |
input_ids[0].cpu().tolist())
|
| 192 |
SPECIAL = {"[CLS]", "[SEP]", "[PAD]", "<s>",
|
| 193 |
-
|
| 194 |
|
| 195 |
words, current_word, current_score = [], "", 0.0
|
| 196 |
for tok, score in zip(tokens, importance):
|
|
@@ -300,7 +300,7 @@ def generate_explanation_text(shap_tokens: List[Dict], label: str, confidence: f
|
|
| 300 |
|
| 301 |
conf_pct = round(confidence * 100)
|
| 302 |
model_display = {"distilbert": "DistilBERT", "roberta": "RoBERTa",
|
| 303 |
-
|
| 304 |
conf_phrase = (
|
| 305 |
"with very high confidence" if conf_pct >= 90 else
|
| 306 |
"with high confidence" if conf_pct >= 75 else
|
|
@@ -318,14 +318,20 @@ def generate_explanation_text(shap_tokens: List[Dict], label: str, confidence: f
|
|
| 318 |
f"{model_display} classified this article as {label} ({label_descriptions.get(label, label)}) {conf_phrase} ({conf_pct}%)."]
|
| 319 |
|
| 320 |
if positive:
|
| 321 |
-
|
|
|
|
|
|
|
| 322 |
|
| 323 |
if negative:
|
| 324 |
-
|
|
|
|
|
|
|
| 325 |
else:
|
| 326 |
-
parts.append(
|
|
|
|
| 327 |
|
| 328 |
if conf_pct < 65:
|
| 329 |
-
parts.append(
|
|
|
|
| 330 |
|
| 331 |
return " ".join(parts)
|
|
|
|
| 153 |
tokens = self.tokenizer.convert_ids_to_tokens(
|
| 154 |
input_ids[0].cpu().tolist())
|
| 155 |
special = {"[CLS]", "[SEP]", "[PAD]", "<s>",
|
| 156 |
+
"</s>", "<pad>", "<cls>", "<sep>", "▁", "Ġ"}
|
| 157 |
pairs = [
|
| 158 |
(t.replace("##", "").replace("▁", "").replace("Ġ", ""), float(s))
|
| 159 |
for t, s in zip(tokens, importance)
|
|
|
|
| 190 |
tokens = self.tokenizer.convert_ids_to_tokens(
|
| 191 |
input_ids[0].cpu().tolist())
|
| 192 |
SPECIAL = {"[CLS]", "[SEP]", "[PAD]", "<s>",
|
| 193 |
+
"</s>", "<pad>", "<cls>", "<sep>", "<unk>"}
|
| 194 |
|
| 195 |
words, current_word, current_score = [], "", 0.0
|
| 196 |
for tok, score in zip(tokens, importance):
|
|
|
|
| 300 |
|
| 301 |
conf_pct = round(confidence * 100)
|
| 302 |
model_display = {"distilbert": "DistilBERT", "roberta": "RoBERTa",
|
| 303 |
+
"xlnet": "XLNet"}.get(model_key, model_key)
|
| 304 |
conf_phrase = (
|
| 305 |
"with very high confidence" if conf_pct >= 90 else
|
| 306 |
"with high confidence" if conf_pct >= 75 else
|
|
|
|
| 318 |
f"{model_display} classified this article as {label} ({label_descriptions.get(label, label)}) {conf_phrase} ({conf_pct}%)."]
|
| 319 |
|
| 320 |
if positive:
|
| 321 |
+
pos_words = ', '.join(f'"{t["word"]}"' for t in positive)
|
| 322 |
+
parts.append(
|
| 323 |
+
f"The words most strongly associated with this classification were {pos_words}, which the model weighted heavily toward a {label} prediction.")
|
| 324 |
|
| 325 |
if negative:
|
| 326 |
+
neg_words = ', '.join(f'"{t["word"]}"' for t in negative)
|
| 327 |
+
parts.append(
|
| 328 |
+
f"On the other hand, terms like {neg_words} pulled against this classification.")
|
| 329 |
else:
|
| 330 |
+
parts.append(
|
| 331 |
+
"The model found little linguistic evidence contradicting this classification.")
|
| 332 |
|
| 333 |
if conf_pct < 65:
|
| 334 |
+
parts.append(
|
| 335 |
+
"The relatively lower confidence suggests the article contains mixed signals and the prediction should be interpreted with caution.")
|
| 336 |
|
| 337 |
return " ".join(parts)
|