Spaces:
Running
Running
Erik Sarriegui commited on
Commit ·
c671692
1
Parent(s): 6d5f43c
New 4 features
Browse files- .gitignore +1 -0
- frontend/src/components/ClusterCard.jsx +18 -4
- main.py +23 -11
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
tests.ipynb
|
frontend/src/components/ClusterCard.jsx
CHANGED
|
@@ -57,10 +57,24 @@ export default function ClusterCard({ cluster }) {
|
|
| 57 |
className="block p-3 rounded-lg bg-slate-800/40 hover:bg-slate-800 transition-colors group"
|
| 58 |
>
|
| 59 |
<div className="flex justify-between items-start gap-3">
|
| 60 |
-
<
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
</div>
|
| 65 |
<div className="mt-2 flex items-center gap-2 text-xs text-slate-500">
|
| 66 |
<span className="font-semibold text-slate-400">{article.newspaper}</span>
|
|
|
|
| 57 |
className="block p-3 rounded-lg bg-slate-800/40 hover:bg-slate-800 transition-colors group"
|
| 58 |
>
|
| 59 |
<div className="flex justify-between items-start gap-3">
|
| 60 |
+
<div className="flex-1">
|
| 61 |
+
<h4 className="text-sm text-slate-300 font-medium line-clamp-2 group-hover:text-blue-300 mb-1">
|
| 62 |
+
{article.title}
|
| 63 |
+
</h4>
|
| 64 |
+
<div className="flex flex-wrap gap-2">
|
| 65 |
+
{article.is_clickbait && (
|
| 66 |
+
<span className="text-[10px] uppercase font-bold text-red-400 bg-red-900/30 border border-red-900/50 px-1.5 py-0.5 rounded">
|
| 67 |
+
Clickbait
|
| 68 |
+
</span>
|
| 69 |
+
)}
|
| 70 |
+
{article.is_sensationalist && (
|
| 71 |
+
<span className="text-[10px] uppercase font-bold text-amber-400 bg-amber-900/30 border border-amber-900/50 px-1.5 py-0.5 rounded">
|
| 72 |
+
Sensationalist
|
| 73 |
+
</span>
|
| 74 |
+
)}
|
| 75 |
+
</div>
|
| 76 |
+
</div>
|
| 77 |
+
<ExternalLink className="w-4 h-4 text-slate-500 shrink-0 group-hover:text-blue-400 mt-1" />
|
| 78 |
</div>
|
| 79 |
<div className="mt-2 flex items-center gap-2 text-xs text-slate-500">
|
| 80 |
<span className="font-semibold text-slate-400">{article.newspaper}</span>
|
main.py
CHANGED
|
@@ -149,7 +149,7 @@ def get_clusters():
|
|
| 149 |
# Usually newest first is better for news.
|
| 150 |
# The user asked for "3 days consecutive". Just the set.
|
| 151 |
# Let's order them chronologically descending.
|
| 152 |
-
target_dates = sorted(target_dates, reverse=
|
| 153 |
|
| 154 |
for day_dt in target_dates:
|
| 155 |
# Filter by datetime
|
|
@@ -171,7 +171,9 @@ def get_clusters():
|
|
| 171 |
"title": row.get('title', 'No Title'),
|
| 172 |
"newspaper": row.get('newspaper', 'Unknown'),
|
| 173 |
"url": row.get('article_url', '#'),
|
| 174 |
-
"newspaper_url": row.get('newspaper_url', '#')
|
|
|
|
|
|
|
| 175 |
})
|
| 176 |
|
| 177 |
# Size calc
|
|
@@ -219,12 +221,15 @@ def analyze_article(request: AnalyzeRequest):
|
|
| 219 |
with torch.no_grad():
|
| 220 |
cb_outputs = models["cb_model"](**cb_inputs)
|
| 221 |
cb_probs = torch.softmax(cb_outputs.logits, dim=1)
|
| 222 |
-
|
| 223 |
-
#
|
| 224 |
-
#
|
| 225 |
-
#
|
| 226 |
-
|
| 227 |
-
|
|
|
|
|
|
|
|
|
|
| 228 |
|
| 229 |
# Sensationalism: using title + text
|
| 230 |
sens_input_text = f"TITULAR: {title}\nCUERPO: {text}"
|
|
@@ -232,15 +237,22 @@ def analyze_article(request: AnalyzeRequest):
|
|
| 232 |
with torch.no_grad():
|
| 233 |
sens_outputs = models["sens_model"](**sens_inputs)
|
| 234 |
sens_probs = torch.softmax(sens_outputs.logits, dim=1)
|
| 235 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 236 |
|
| 237 |
return {
|
| 238 |
"title": title,
|
| 239 |
"text_snippet": text[:200] + "..." if len(text) > 200 else text,
|
| 240 |
"is_clickbait": is_clickbait,
|
| 241 |
"is_sensationalist": is_sensationalist,
|
| 242 |
-
"clickbait_conf": float(
|
| 243 |
-
"sensationalist_conf": float(
|
| 244 |
}
|
| 245 |
|
| 246 |
except Exception as e:
|
|
|
|
| 149 |
# Usually newest first is better for news.
|
| 150 |
# The user asked for "3 days consecutive". Just the set.
|
| 151 |
# Let's order them chronologically descending.
|
| 152 |
+
target_dates = sorted(target_dates, reverse=False)
|
| 153 |
|
| 154 |
for day_dt in target_dates:
|
| 155 |
# Filter by datetime
|
|
|
|
| 171 |
"title": row.get('title', 'No Title'),
|
| 172 |
"newspaper": row.get('newspaper', 'Unknown'),
|
| 173 |
"url": row.get('article_url', '#'),
|
| 174 |
+
"newspaper_url": row.get('newspaper_url', '#'),
|
| 175 |
+
"is_clickbait": bool(row.get('is_clickbait', False)) if pd.notna(row.get('is_clickbait')) else None,
|
| 176 |
+
"is_sensationalist": bool(row.get('is_sensationalist', False)) if pd.notna(row.get('is_sensationalist')) else None
|
| 177 |
})
|
| 178 |
|
| 179 |
# Size calc
|
|
|
|
| 221 |
with torch.no_grad():
|
| 222 |
cb_outputs = models["cb_model"](**cb_inputs)
|
| 223 |
cb_probs = torch.softmax(cb_outputs.logits, dim=1)
|
| 224 |
+
|
| 225 |
+
# Feature 3 & 4: Inverted Clickbait Logic & Thresholds
|
| 226 |
+
# Clickbait Model: Label 0 is CLICKBAIT (based on user info)
|
| 227 |
+
# Check for decision_threshold in config, default to 0.5
|
| 228 |
+
cb_threshold = getattr(models["cb_model"].config, 'decision_threshold', 0.5)
|
| 229 |
+
|
| 230 |
+
# Prob of class 0 (Clickbait)
|
| 231 |
+
prob_clickbait = cb_probs[0][0].item()
|
| 232 |
+
is_clickbait = prob_clickbait >= cb_threshold
|
| 233 |
|
| 234 |
# Sensationalism: using title + text
|
| 235 |
sens_input_text = f"TITULAR: {title}\nCUERPO: {text}"
|
|
|
|
| 237 |
with torch.no_grad():
|
| 238 |
sens_outputs = models["sens_model"](**sens_inputs)
|
| 239 |
sens_probs = torch.softmax(sens_outputs.logits, dim=1)
|
| 240 |
+
|
| 241 |
+
# Sensationalism Model: Standard (Label 1 is Sensationalist)
|
| 242 |
+
# Check for decision_threshold in config, default to 0.5
|
| 243 |
+
sens_threshold = getattr(models["sens_model"].config, 'decision_threshold', 0.5)
|
| 244 |
+
|
| 245 |
+
# Prob of class 1 (Sensationalist)
|
| 246 |
+
prob_sensationalist = sens_probs[0][1].item()
|
| 247 |
+
is_sensationalist = prob_sensationalist >= sens_threshold
|
| 248 |
|
| 249 |
return {
|
| 250 |
"title": title,
|
| 251 |
"text_snippet": text[:200] + "..." if len(text) > 200 else text,
|
| 252 |
"is_clickbait": is_clickbait,
|
| 253 |
"is_sensationalist": is_sensationalist,
|
| 254 |
+
"clickbait_conf": float(prob_clickbait), # Sending confidence of being clickbait
|
| 255 |
+
"sensationalist_conf": float(prob_sensationalist)
|
| 256 |
}
|
| 257 |
|
| 258 |
except Exception as e:
|