Erik Sarriegui commited on
Commit
c671692
·
1 Parent(s): 6d5f43c

New 4 features

Browse files
Files changed (3) hide show
  1. .gitignore +1 -0
  2. frontend/src/components/ClusterCard.jsx +18 -4
  3. main.py +23 -11
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ tests.ipynb
frontend/src/components/ClusterCard.jsx CHANGED
@@ -57,10 +57,24 @@ export default function ClusterCard({ cluster }) {
57
  className="block p-3 rounded-lg bg-slate-800/40 hover:bg-slate-800 transition-colors group"
58
  >
59
  <div className="flex justify-between items-start gap-3">
60
- <h4 className="text-sm text-slate-300 font-medium line-clamp-2 group-hover:text-blue-300">
61
- {article.title}
62
- </h4>
63
- <ExternalLink className="w-4 h-4 text-slate-500 shrink-0 group-hover:text-blue-400" />
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  </div>
65
  <div className="mt-2 flex items-center gap-2 text-xs text-slate-500">
66
  <span className="font-semibold text-slate-400">{article.newspaper}</span>
 
57
  className="block p-3 rounded-lg bg-slate-800/40 hover:bg-slate-800 transition-colors group"
58
  >
59
  <div className="flex justify-between items-start gap-3">
60
+ <div className="flex-1">
61
+ <h4 className="text-sm text-slate-300 font-medium line-clamp-2 group-hover:text-blue-300 mb-1">
62
+ {article.title}
63
+ </h4>
64
+ <div className="flex flex-wrap gap-2">
65
+ {article.is_clickbait && (
66
+ <span className="text-[10px] uppercase font-bold text-red-400 bg-red-900/30 border border-red-900/50 px-1.5 py-0.5 rounded">
67
+ Clickbait
68
+ </span>
69
+ )}
70
+ {article.is_sensationalist && (
71
+ <span className="text-[10px] uppercase font-bold text-amber-400 bg-amber-900/30 border border-amber-900/50 px-1.5 py-0.5 rounded">
72
+ Sensationalist
73
+ </span>
74
+ )}
75
+ </div>
76
+ </div>
77
+ <ExternalLink className="w-4 h-4 text-slate-500 shrink-0 group-hover:text-blue-400 mt-1" />
78
  </div>
79
  <div className="mt-2 flex items-center gap-2 text-xs text-slate-500">
80
  <span className="font-semibold text-slate-400">{article.newspaper}</span>
main.py CHANGED
@@ -149,7 +149,7 @@ def get_clusters():
149
  # Usually newest first is better for news.
150
  # The user asked for "3 days consecutive". Just the set.
151
  # Let's order them chronologically descending.
152
- target_dates = sorted(target_dates, reverse=True)
153
 
154
  for day_dt in target_dates:
155
  # Filter by datetime
@@ -171,7 +171,9 @@ def get_clusters():
171
  "title": row.get('title', 'No Title'),
172
  "newspaper": row.get('newspaper', 'Unknown'),
173
  "url": row.get('article_url', '#'),
174
- "newspaper_url": row.get('newspaper_url', '#')
 
 
175
  })
176
 
177
  # Size calc
@@ -219,12 +221,15 @@ def analyze_article(request: AnalyzeRequest):
219
  with torch.no_grad():
220
  cb_outputs = models["cb_model"](**cb_inputs)
221
  cb_probs = torch.softmax(cb_outputs.logits, dim=1)
222
- # Assuming label 0 is NO, 1 is YES (need to verify mapping usually, usually 1 is positive class)
223
- # Checking model config locally or on HF would be ideal. Let's assume standard mapping:
224
- # For clickbait: likely labels are "no_clickbait", "clickbait".
225
- # We can check specific model details if needed. For now assume:
226
- # Label 1 is clickbait.
227
- is_clickbait = torch.argmax(cb_probs).item() == 1
 
 
 
228
 
229
  # Sensationalism: using title + text
230
  sens_input_text = f"TITULAR: {title}\nCUERPO: {text}"
@@ -232,15 +237,22 @@ def analyze_article(request: AnalyzeRequest):
232
  with torch.no_grad():
233
  sens_outputs = models["sens_model"](**sens_inputs)
234
  sens_probs = torch.softmax(sens_outputs.logits, dim=1)
235
- is_sensationalist = torch.argmax(sens_probs).item() == 1
 
 
 
 
 
 
 
236
 
237
  return {
238
  "title": title,
239
  "text_snippet": text[:200] + "..." if len(text) > 200 else text,
240
  "is_clickbait": is_clickbait,
241
  "is_sensationalist": is_sensationalist,
242
- "clickbait_conf": float(cb_probs[0][1]),
243
- "sensationalist_conf": float(sens_probs[0][1])
244
  }
245
 
246
  except Exception as e:
 
149
  # Usually newest first is better for news.
150
  # The user asked for "3 days consecutive". Just the set.
151
  # Let's order them chronologically descending.
152
+ target_dates = sorted(target_dates, reverse=False)
153
 
154
  for day_dt in target_dates:
155
  # Filter by datetime
 
171
  "title": row.get('title', 'No Title'),
172
  "newspaper": row.get('newspaper', 'Unknown'),
173
  "url": row.get('article_url', '#'),
174
+ "newspaper_url": row.get('newspaper_url', '#'),
175
+ "is_clickbait": bool(row.get('is_clickbait', False)) if pd.notna(row.get('is_clickbait')) else None,
176
+ "is_sensationalist": bool(row.get('is_sensationalist', False)) if pd.notna(row.get('is_sensationalist')) else None
177
  })
178
 
179
  # Size calc
 
221
  with torch.no_grad():
222
  cb_outputs = models["cb_model"](**cb_inputs)
223
  cb_probs = torch.softmax(cb_outputs.logits, dim=1)
224
+
225
+ # Feature 3 & 4: Inverted Clickbait Logic & Thresholds
226
+ # Clickbait Model: Label 0 is CLICKBAIT (based on user info)
227
+ # Check for decision_threshold in config, default to 0.5
228
+ cb_threshold = getattr(models["cb_model"].config, 'decision_threshold', 0.5)
229
+
230
+ # Prob of class 0 (Clickbait)
231
+ prob_clickbait = cb_probs[0][0].item()
232
+ is_clickbait = prob_clickbait >= cb_threshold
233
 
234
  # Sensationalism: using title + text
235
  sens_input_text = f"TITULAR: {title}\nCUERPO: {text}"
 
237
  with torch.no_grad():
238
  sens_outputs = models["sens_model"](**sens_inputs)
239
  sens_probs = torch.softmax(sens_outputs.logits, dim=1)
240
+
241
+ # Sensationalism Model: Standard (Label 1 is Sensationalist)
242
+ # Check for decision_threshold in config, default to 0.5
243
+ sens_threshold = getattr(models["sens_model"].config, 'decision_threshold', 0.5)
244
+
245
+ # Prob of class 1 (Sensationalist)
246
+ prob_sensationalist = sens_probs[0][1].item()
247
+ is_sensationalist = prob_sensationalist >= sens_threshold
248
 
249
  return {
250
  "title": title,
251
  "text_snippet": text[:200] + "..." if len(text) > 200 else text,
252
  "is_clickbait": is_clickbait,
253
  "is_sensationalist": is_sensationalist,
254
+ "clickbait_conf": float(prob_clickbait), # Sending confidence of being clickbait
255
+ "sensationalist_conf": float(prob_sensationalist)
256
  }
257
 
258
  except Exception as e: