Erik Sarriegui commited on
Commit
588462f
·
1 Parent(s): d3c610f

first push

Browse files
Dockerfile ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Stage 1: Build Frontend
2
+ FROM node:18-alpine as frontend-build
3
+
4
+ WORKDIR /app/frontend
5
+
6
+ COPY frontend/package.json ./
7
+ RUN npm install
8
+
9
+ COPY frontend/ ./
10
+ RUN npm run build
11
+
12
+ # Stage 2: Backend & Runtime
13
+ FROM python:3.10-slim
14
+
15
+ WORKDIR /app
16
+
17
+ # Install system dependencies
18
+ RUN apt-get update && apt-get install -y \
19
+ build-essential \
20
+ libxml2-dev \
21
+ libxslt-dev \
22
+ && rm -rf /var/lib/apt/lists/*
23
+
24
+ # Install Python dependencies
25
+ COPY requirements.txt .
26
+ RUN pip install --no-cache-dir -r requirements.txt
27
+
28
+ # Copy backend code
29
+ COPY main.py .
30
+ COPY src/ src/
31
+
32
+ # Copy built frontend from Stage 1
33
+ COPY --from=frontend-build /app/frontend/dist /app/static
34
+
35
+ # Create cache directory for Hugging Face models to ensure they are writable
36
+ # HF Spaces run as user 1000
37
+ RUN mkdir -p /app/cache && chmod 777 /app/cache
38
+ ENV HF_HOME=/app/cache
39
+
40
+ # Expose port (7860 is default for HF Spaces)
41
+ EXPOSE 7860
42
+
43
+ # Run the app
44
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
frontend/index.html ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!doctype html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <link rel="icon" type="image/svg+xml" href="/vite.svg" />
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
+ <title>News Intelligence</title>
8
+ <link rel="preconnect" href="https://fonts.googleapis.com">
9
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
10
+ <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet">
11
+ </head>
12
+ <body class="bg-slate-950 text-slate-50 font-sans">
13
+ <div id="root"></div>
14
+ <script type="module" src="/src/main.jsx"></script>
15
+ </body>
16
+ </html>
frontend/package.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "frontend",
3
+ "private": true,
4
+ "version": "0.0.0",
5
+ "type": "module",
6
+ "scripts": {
7
+ "dev": "vite",
8
+ "build": "vite build",
9
+ "preview": "vite preview"
10
+ },
11
+ "dependencies": {
12
+ "axios": "^1.6.7",
13
+ "clsx": "^2.1.0",
14
+ "framer-motion": "^11.0.3",
15
+ "lucide-react": "^0.330.0",
16
+ "react": "^18.2.0",
17
+ "react-dom": "^18.2.0",
18
+ "tailwind-merge": "^2.2.1"
19
+ },
20
+ "devDependencies": {
21
+ "@types/react": "^18.2.55",
22
+ "@types/react-dom": "^18.2.19",
23
+ "@vitejs/plugin-react": "^4.2.1",
24
+ "autoprefixer": "^10.4.17",
25
+ "postcss": "^8.4.35",
26
+ "tailwindcss": "^3.4.1",
27
+ "vite": "^5.1.0"
28
+ }
29
+ }
frontend/postcss.config.js ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ export default {
2
+ plugins: {
3
+ tailwindcss: {},
4
+ autoprefixer: {},
5
+ },
6
+ }
frontend/src/App.jsx ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useState } from 'react';
2
+ import { Newspaper, Search, LayoutGrid } from 'lucide-react';
3
+ import { motion, AnimatePresence } from 'framer-motion';
4
+ import Aggregator from './components/Aggregator';
5
+ import Analyzer from './components/Analyzer';
6
+ import { cn } from './lib/utils';
7
+
8
+ function App() {
9
+ const [activeTab, setActiveTab] = useState('aggregator');
10
+
11
+ return (
12
+ <div className="min-h-screen bg-slate-950 text-slate-100 flex flex-col items-center py-10 px-4">
13
+ <header className="mb-8 text-center">
14
+ <h1 className="text-4xl font-bold bg-gradient-to-r from-blue-400 to-purple-500 bg-clip-text text-transparent mb-2">
15
+ News Intelligence
16
+ </h1>
17
+ <p className="text-slate-400">Advanced Aggregation & Analysis</p>
18
+ </header>
19
+
20
+ {/* Navigation */}
21
+ <div className="bg-slate-900/50 p-1 rounded-xl flex gap-1 mb-8 border border-slate-800 backdrop-blur-sm">
22
+ <button
23
+ onClick={() => setActiveTab('aggregator')}
24
+ className={cn(
25
+ "flex items-center gap-2 px-6 py-2.5 rounded-lg text-sm font-medium transition-all duration-300",
26
+ activeTab === 'aggregator'
27
+ ? "bg-blue-600 text-white shadow-lg shadow-blue-900/20"
28
+ : "text-slate-400 hover:text-white hover:bg-slate-800"
29
+ )}
30
+ >
31
+ <LayoutGrid className="w-4 h-4" />
32
+ Event Stream
33
+ </button>
34
+ <button
35
+ onClick={() => setActiveTab('analyzer')}
36
+ className={cn(
37
+ "flex items-center gap-2 px-6 py-2.5 rounded-lg text-sm font-medium transition-all duration-300",
38
+ activeTab === 'analyzer'
39
+ ? "bg-purple-600 text-white shadow-lg shadow-purple-900/20"
40
+ : "text-slate-400 hover:text-white hover:bg-slate-800"
41
+ )}
42
+ >
43
+ <Search className="w-4 h-4" />
44
+ Analyzer
45
+ </button>
46
+ </div>
47
+
48
+ {/* Content Area */}
49
+ <main className="w-full max-w-5xl">
50
+ <AnimatePresence mode="wait">
51
+ {activeTab === 'aggregator' ? (
52
+ <motion.div
53
+ key="aggregator"
54
+ initial={{ opacity: 0, y: 20 }}
55
+ animate={{ opacity: 1, y: 0 }}
56
+ exit={{ opacity: 0, y: -20 }}
57
+ transition={{ duration: 0.3 }}
58
+ >
59
+ <Aggregator />
60
+ </motion.div>
61
+ ) : (
62
+ <motion.div
63
+ key="analyzer"
64
+ initial={{ opacity: 0, y: 20 }}
65
+ animate={{ opacity: 1, y: 0 }}
66
+ exit={{ opacity: 0, y: -20 }}
67
+ transition={{ duration: 0.3 }}
68
+ >
69
+ <Analyzer />
70
+ </motion.div>
71
+ )}
72
+ </AnimatePresence>
73
+ </main>
74
+ </div>
75
+ );
76
+ }
77
+
78
+ export default App;
frontend/src/components/Aggregator.jsx ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useState, useEffect } from 'react';
2
+ import axios from 'axios';
3
+ import { Calendar, RefreshCw } from 'lucide-react';
4
+ import ClusterCard from './ClusterCard';
5
+ import { cn } from '../lib/utils';
6
+ import { motion } from 'framer-motion';
7
+
8
+ export default function Aggregator() {
9
+ const [data, setData] = useState(null);
10
+ const [loading, setLoading] = useState(true);
11
+ const [error, setError] = useState(null);
12
+ const [selectedDayIndex, setSelectedDayIndex] = useState(0);
13
+
14
+ useEffect(() => {
15
+ fetchData();
16
+ }, []);
17
+
18
+ const fetchData = async () => {
19
+ try {
20
+ setLoading(true);
21
+ setError(null);
22
+ const response = await axios.get('/api/clusters');
23
+
24
+ // Sort daysdescending just in case or keep as valid from API
25
+ // API returns top 3 days. We'll use them as is.
26
+ setData(response.data);
27
+ } catch (err) {
28
+ console.error(err);
29
+ setError("Failed to load clusters. Please try again.");
30
+ } finally {
31
+ setLoading(false);
32
+ }
33
+ };
34
+
35
+ if (loading) {
36
+ return (
37
+ <div className="flex flex-col items-center justify-center py-20 text-slate-500">
38
+ <RefreshCw className="w-8 h-8 animate-spin mb-4 text-blue-500" />
39
+ <p>Loading intelligence stream...</p>
40
+ </div>
41
+ );
42
+ }
43
+
44
+ if (error) {
45
+ return (
46
+ <div className="text-center py-20 text-red-400 bg-red-900/10 rounded-xl border border-red-900/20">
47
+ <p className="mb-4">{error}</p>
48
+ <button
49
+ onClick={fetchData}
50
+ className="px-4 py-2 bg-red-600 text-white rounded-lg hover:bg-red-500 transition-colors"
51
+ >
52
+ Retry
53
+ </button>
54
+ </div>
55
+ );
56
+ }
57
+
58
+ if (!data || data.length === 0) {
59
+ return <div className="text-center py-20 text-slate-500">No data available.</div>;
60
+ }
61
+
62
+ const currentDay = data[selectedDayIndex];
63
+
64
+ return (
65
+ <div className="space-y-6">
66
+ {/* Day Tabs */}
67
+ <div className="flex justify-center gap-4">
68
+ {data.map((dayItem, index) => (
69
+ <button
70
+ key={dayItem.date}
71
+ onClick={() => setSelectedDayIndex(index)}
72
+ className={cn(
73
+ "flex flex-col items-center p-3 rounded-xl border transition-all duration-300 w-32",
74
+ selectedDayIndex === index
75
+ ? "bg-slate-800 border-blue-500/50 shadow-lg shadow-blue-500/10 text-white"
76
+ : "bg-slate-900/50 border-slate-800 text-slate-400 hover:bg-slate-800 hover:border-slate-700"
77
+ )}
78
+ >
79
+ <span className="text-xs font-medium uppercase tracking-wider text-slate-500 mb-1">
80
+ {new Date(dayItem.date).toLocaleDateString(undefined, { weekday: 'short' })}
81
+ </span>
82
+ <span className="text-lg font-bold">
83
+ {new Date(dayItem.date).toLocaleDateString(undefined, { day: '2-digit', month: 'short' })}
84
+ </span>
85
+ </button>
86
+ ))}
87
+ </div>
88
+
89
+ {/* Stats for the day */}
90
+ <div className="flex items-center justify-between px-4 pb-2 text-slate-400 text-sm border-b border-slate-800">
91
+ <div className="flex items-center gap-2">
92
+ <Calendar className="w-4 h-4" />
93
+ <span>Events for {new Date(currentDay.date).toLocaleDateString()}</span>
94
+ </div>
95
+ <div>
96
+ {currentDay.clusters.length} Event Clusters
97
+ </div>
98
+ </div>
99
+
100
+ {/* Clusters Grid */}
101
+ <div className="grid grid-cols-1 md:grid-cols-1 gap-4">
102
+ {currentDay.clusters.map((cluster) => (
103
+ <ClusterCard key={cluster.cluster_id} cluster={cluster} />
104
+ ))}
105
+ </div>
106
+ </div>
107
+ );
108
+ }
frontend/src/components/Analyzer.jsx ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useState } from 'react';
2
+ import axios from 'axios';
3
+ import { Search, AlertTriangle, CheckCircle, AlertOctagon, ArrowRight, Loader2 } from 'lucide-react';
4
+ import { motion } from 'framer-motion';
5
+ import { cn } from '../lib/utils';
6
+
7
+ export default function Analyzer() {
8
+ const [url, setUrl] = useState('');
9
+ const [result, setResult] = useState(null);
10
+ const [loading, setLoading] = useState(false);
11
+ const [error, setError] = useState(null);
12
+
13
+ const handleAnalyze = async (e) => {
14
+ e.preventDefault();
15
+ if (!url) return;
16
+
17
+ setLoading(true);
18
+ setError(null);
19
+ setResult(null);
20
+
21
+ try {
22
+ const response = await axios.post('/api/analyze', { url });
23
+ setResult(response.data);
24
+ } catch (err) {
25
+ console.error(err);
26
+ setError("Analysis failed. Please check the URL or try again later.");
27
+ } finally {
28
+ setLoading(false);
29
+ }
30
+ };
31
+
32
+ return (
33
+ <div className="max-w-3xl mx-auto space-y-8">
34
+ {/* Input Section */}
35
+ <div className="bg-slate-900/50 p-8 rounded-2xl border border-slate-800 shadow-xl backdrop-blur-sm">
36
+ <h2 className="text-2xl font-bold mb-6 text-center text-slate-100">Analyze News Article</h2>
37
+ <form onSubmit={handleAnalyze} className="flex gap-2">
38
+ <div className="relative flex-1">
39
+ <Search className="absolute left-4 top-1/2 -translate-y-1/2 text-slate-500 w-5 h-5" />
40
+ <input
41
+ type="url"
42
+ placeholder="Paste article URL here (e.g., https://elpais.com/...)"
43
+ value={url}
44
+ onChange={(e) => setUrl(e.target.value)}
45
+ className="w-full bg-slate-950 border border-slate-700 text-slate-100 pl-11 pr-4 py-4 rounded-xl focus:outline-none focus:ring-2 focus:ring-blue-500/50 transition-all placeholder:text-slate-600"
46
+ required
47
+ />
48
+ </div>
49
+ <button
50
+ type="submit"
51
+ disabled={loading}
52
+ className="bg-blue-600 hover:bg-blue-500 text-white px-8 rounded-xl font-medium transition-colors disabled:opacity-50 disabled:cursor-not-allowed flex items-center gap-2"
53
+ >
54
+ {loading ? <Loader2 className="animate-spin w-5 h-5" /> : "Analyze"}
55
+ </button>
56
+ </form>
57
+ </div>
58
+
59
+ {/* Error Message */}
60
+ {error && (
61
+ <motion.div
62
+ initial={{ opacity: 0, y: 10 }}
63
+ animate={{ opacity: 1, y: 0 }}
64
+ className="p-4 bg-red-950/30 border border-red-900/50 rounded-xl text-red-400 text-center"
65
+ >
66
+ {error}
67
+ </motion.div>
68
+ )}
69
+
70
+ {/* Results Section */}
71
+ {result && (
72
+ <motion.div
73
+ initial={{ opacity: 0, scale: 0.95 }}
74
+ animate={{ opacity: 1, scale: 1 }}
75
+ className="bg-slate-900/50 rounded-2xl border border-slate-800 overflow-hidden shadow-2xl"
76
+ >
77
+ {/* Header */}
78
+ <div className="p-6 border-b border-slate-800 bg-slate-900/80">
79
+ <h3 className="text-xl font-semibold text-slate-100 leading-snug">{result.title}</h3>
80
+ <p className="mt-2 text-slate-400 text-sm line-clamp-3 leading-relaxed">
81
+ {result.text_snippet}
82
+ </p>
83
+ </div>
84
+
85
+ {/* Analysis Cards */}
86
+ <div className="grid grid-cols-1 md:grid-cols-2 gap-px bg-slate-800">
87
+ {/* Clickbait Result */}
88
+ <div className={cn(
89
+ "p-8 flex flex-col items-center justify-center text-center gap-4",
90
+ result.is_clickbait ? "bg-red-950/20" : "bg-emerald-950/20"
91
+ )}>
92
+ <div className={cn(
93
+ "w-16 h-16 rounded-full flex items-center justify-center mb-2 shadow-lg",
94
+ result.is_clickbait ? "bg-orange-500/20 text-orange-500" : "bg-emerald-500/20 text-emerald-500"
95
+ )}>
96
+ {result.is_clickbait ? <AlertTriangle className="w-8 h-8" /> : <CheckCircle className="w-8 h-8" />}
97
+ </div>
98
+ <div>
99
+ <h4 className="text-slate-400 text-sm font-medium uppercase tracking-widest mb-1">Headline Analysis</h4>
100
+ <p className={cn(
101
+ "text-2xl font-bold",
102
+ result.is_clickbait ? "text-orange-400" : "text-emerald-400"
103
+ )}>
104
+ {result.is_clickbait ? "Clickbait Detected" : "No Clickbait"}
105
+ </p>
106
+ </div>
107
+ <div className="text-xs text-slate-500 bg-slate-900/50 px-3 py-1 rounded-full">
108
+ Confidence: {(result.clickbait_conf * 100).toFixed(1)}%
109
+ </div>
110
+ </div>
111
+
112
+ {/* Sensationalism Result */}
113
+ <div className={cn(
114
+ "p-8 flex flex-col items-center justify-center text-center gap-4",
115
+ result.is_sensationalist ? "bg-red-950/20" : "bg-emerald-950/20"
116
+ )}>
117
+ <div className={cn(
118
+ "w-16 h-16 rounded-full flex items-center justify-center mb-2 shadow-lg",
119
+ result.is_sensationalist ? "bg-red-500/20 text-red-500" : "bg-blue-500/20 text-blue-500"
120
+ )}>
121
+ {result.is_sensationalist ? <AlertOctagon className="w-8 h-8" /> : <CheckCircle className="w-8 h-8" />}
122
+ </div>
123
+ <div>
124
+ <h4 className="text-slate-400 text-sm font-medium uppercase tracking-widest mb-1">Tone Analysis</h4>
125
+ <p className={cn(
126
+ "text-2xl font-bold",
127
+ result.is_sensationalist ? "text-red-400" : "text-blue-400"
128
+ )}>
129
+ {result.is_sensationalist ? "Sensationalist" : "Neutral Tone"}
130
+ </p>
131
+ </div>
132
+ <div className="text-xs text-slate-500 bg-slate-900/50 px-3 py-1 rounded-full">
133
+ Confidence: {(result.sensationalist_conf * 100).toFixed(1)}%
134
+ </div>
135
+ </div>
136
+ </div>
137
+ </motion.div>
138
+ )}
139
+ </div>
140
+ );
141
+ }
frontend/src/components/ClusterCard.jsx ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useState } from 'react';
2
+ import { motion, AnimatePresence } from 'framer-motion';
3
+ import { ChevronDown, ExternalLink, Hash, FileText } from 'lucide-react';
4
+ import { cn } from '../lib/utils';
5
+
6
+ export default function ClusterCard({ cluster }) {
7
+ const [isExpanded, setIsExpanded] = useState(false);
8
+
9
+ return (
10
+ <motion.div
11
+ layout
12
+ className="bg-slate-900/50 border border-slate-800 rounded-xl overflow-hidden hover:border-slate-700 transition-colors"
13
+ >
14
+ <div
15
+ onClick={() => setIsExpanded(!isExpanded)}
16
+ className="p-5 cursor-pointer flex items-start justify-between gap-4"
17
+ >
18
+ <div className="flex-1">
19
+ <div className="flex items-center gap-2 mb-2">
20
+ <span className="bg-blue-500/10 text-blue-400 text-xs px-2 py-0.5 rounded-full border border-blue-500/20 font-medium flex items-center gap-1">
21
+ <Hash className="w-3 h-3" />
22
+ Cluster {cluster.cluster_id}
23
+ </span>
24
+ <span className="bg-slate-800 text-slate-400 text-xs px-2 py-0.5 rounded-full font-medium flex items-center gap-1">
25
+ <FileText className="w-3 h-3" />
26
+ {cluster.size} articles
27
+ </span>
28
+ </div>
29
+ <h3 className="text-lg font-semibold text-slate-200 leading-tight group-hover:text-blue-400 transition-colors">
30
+ {cluster.title}
31
+ </h3>
32
+ </div>
33
+ <motion.div
34
+ animate={{ rotate: isExpanded ? 180 : 0 }}
35
+ transition={{ duration: 0.2 }}
36
+ className="bg-slate-800 p-1.5 rounded-lg text-slate-400"
37
+ >
38
+ <ChevronDown className="w-5 h-5" />
39
+ </motion.div>
40
+ </div>
41
+
42
+ <AnimatePresence>
43
+ {isExpanded && (
44
+ <motion.div
45
+ initial={{ height: 0, opacity: 0 }}
46
+ animate={{ height: "auto", opacity: 1 }}
47
+ exit={{ height: 0, opacity: 0 }}
48
+ className="border-t border-slate-800 bg-slate-900/30"
49
+ >
50
+ <div className="p-4 space-y-3">
51
+ {cluster.articles.map((article, idx) => (
52
+ <a
53
+ key={idx}
54
+ href={article.url}
55
+ target="_blank"
56
+ rel="noopener noreferrer"
57
+ className="block p-3 rounded-lg bg-slate-800/40 hover:bg-slate-800 transition-colors group"
58
+ >
59
+ <div className="flex justify-between items-start gap-3">
60
+ <h4 className="text-sm text-slate-300 font-medium line-clamp-2 group-hover:text-blue-300">
61
+ {article.title}
62
+ </h4>
63
+ <ExternalLink className="w-4 h-4 text-slate-500 shrink-0 group-hover:text-blue-400" />
64
+ </div>
65
+ <div className="mt-2 flex items-center gap-2 text-xs text-slate-500">
66
+ <span className="font-semibold text-slate-400">{article.newspaper}</span>
67
+ <span>•</span>
68
+ <span className="truncate max-w-[200px]">{article.newspaper_url}</span>
69
+ </div>
70
+ </a>
71
+ ))}
72
+ </div>
73
+ </motion.div>
74
+ )}
75
+ </AnimatePresence>
76
+ </motion.div>
77
+ );
78
+ }
frontend/src/index.css ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ @tailwind base;
2
+ @tailwind components;
3
+ @tailwind utilities;
4
+
5
+ @layer base {
6
+ body {
7
+ @apply bg-slate-950 text-slate-100;
8
+ }
9
+ }
frontend/src/lib/utils.js ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ import { clsx } from "clsx"
2
+ import { twMerge } from "tailwind-merge"
3
+
4
+ export function cn(...inputs) {
5
+ return twMerge(clsx(inputs))
6
+ }
frontend/src/main.jsx ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from 'react'
2
+ import ReactDOM from 'react-dom/client'
3
+ import App from './App.jsx'
4
+ import './index.css'
5
+
6
+ ReactDOM.createRoot(document.getElementById('root')).render(
7
+ <React.StrictMode>
8
+ <App />
9
+ </React.StrictMode>,
10
+ )
frontend/tailwind.config.js ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /** @type {import('tailwindcss').Config} */
2
+ export default {
3
+ content: [
4
+ "./index.html",
5
+ "./src/**/*.{js,ts,jsx,tsx}",
6
+ ],
7
+ theme: {
8
+ extend: {
9
+ fontFamily: {
10
+ sans: ['Inter', 'sans-serif'],
11
+ },
12
+ colors: {
13
+ primary: "#2563EB",
14
+ secondary: "#475569",
15
+ accent: "#F59E0B",
16
+ background: "#0F172A",
17
+ surface: "#1E293B",
18
+ }
19
+ },
20
+ },
21
+ plugins: [],
22
+ }
frontend/vite.config.js ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { defineConfig } from 'vite'
2
+ import react from '@vitejs/plugin-react'
3
+ import path from 'path'
4
+
5
+ // https://vitejs.dev/config/
6
+ export default defineConfig({
7
+ plugins: [react()],
8
+ resolve: {
9
+ alias: {
10
+ '@': path.resolve(__dirname, './src'),
11
+ },
12
+ },
13
+ server: {
14
+ proxy: {
15
+ '/api': {
16
+ target: 'http://localhost:8000',
17
+ changeOrigin: true,
18
+ },
19
+ },
20
+ },
21
+ })
main.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ from fastapi import FastAPI, HTTPException
4
+ from fastapi.middleware.cors import CORSMiddleware
5
+ from fastapi.staticfiles import StaticFiles
6
+ from pydantic import BaseModel
7
+ from typing import List, Dict, Any
8
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
9
+ import torch
10
+ import newspaper
11
+ from newspaper import Article
12
+ import logging
13
+ import nltk
14
+
15
+ # Configure logging
16
+ logging.basicConfig(level=logging.INFO)
17
+ logger = logging.getLogger(__name__)
18
+
19
+ app = FastAPI()
20
+
21
+ # CORS configuration
22
+ app.add_middleware(
23
+ CORSMiddleware,
24
+ allow_origins=["*"],
25
+ allow_credentials=True,
26
+ allow_methods=["*"],
27
+ allow_headers=["*"],
28
+ )
29
+
30
+ # Global variables
31
+ DATA_PATH = "src/dataset.parquet"
32
+ df = None
33
+ models = {}
34
+
35
+ # Load Data
36
+ def load_data():
37
+ global df
38
+ try:
39
+ logger.info(f"Loading data from {DATA_PATH}...")
40
+ df = pd.read_parquet(DATA_PATH)
41
+ # Ensure date is string or datetime for consistent handling
42
+ df['date'] = df['date'].astype(str)
43
+ logger.info("Data loaded successfully.")
44
+ except Exception as e:
45
+ logger.error(f"Error loading data: {e}")
46
+ df = pd.DataFrame()
47
+
48
+ # Load Models
49
+ def load_models():
50
+ # Only load if not already loaded to save resources on reload (though in docker it's once)
51
+ if "clickbait" in models:
52
+ return
53
+
54
+ try:
55
+ logger.info("Loading Clickbait Model...")
56
+ cb_model_name = "eriksarriegui/mmBERT-base-clickbait-detection-es"
57
+ models["cb_tokenizer"] = AutoTokenizer.from_pretrained(cb_model_name)
58
+ models["cb_model"] = AutoModelForSequenceClassification.from_pretrained(cb_model_name)
59
+
60
+ logger.info("Loading Sensationalism Model...")
61
+ sens_model_name = "eriksarriegui/mmBERT-base-sensacionalism-detection-es"
62
+ models["sens_tokenizer"] = AutoTokenizer.from_pretrained(sens_model_name)
63
+ models["sens_model"] = AutoModelForSequenceClassification.from_pretrained(sens_model_name)
64
+ logger.info("Models loaded successfully.")
65
+ except Exception as e:
66
+ logger.error(f"Error loading models: {e}")
67
+
68
+ @app.on_event("startup")
69
+ async def startup_event():
70
+ try:
71
+ nltk.download('punkt')
72
+ nltk.download('punkt_tab')
73
+ except Exception as e:
74
+ logger.warning(f"NLTK download warning: {e}")
75
+
76
+ load_data()
77
+ # We might want to load models lazily if memory is tight, but for now let's load on startup
78
+ # or trigger it on first request. Let's trigger on startup to fail fast.
79
+ # Note: Downloading models takes time.
80
+ load_models()
81
+
82
+ # Pydantic Models
83
+ class AnalyzeRequest(BaseModel):
84
+ url: str
85
+
86
+ class ClusterResponse(BaseModel):
87
+ date: str
88
+ clusters: List[Dict[str, Any]]
89
+
90
+ # Endpoints
91
+
92
+ @app.get("/api/clusters")
93
+ def get_clusters():
94
+ if df is None or df.empty:
95
+ raise HTTPException(status_code=500, detail="Data not loaded")
96
+
97
+ # Group by date and count clusters
98
+ # Assuming 'date' is YYYY-MM-DD
99
+ # cluster_id uniquely identifies a cluster.
100
+
101
+ # 1. Count clusters per day
102
+ # We count unique cluster_ids per date
103
+ daily_counts = df.groupby('date')['cluster_id'].nunique().reset_index()
104
+ daily_counts.columns = ['date', 'count']
105
+
106
+ # 2. Get top 3 days
107
+ top_days = daily_counts.sort_values('count', ascending=False).head(3)['date'].tolist()
108
+
109
+ result = []
110
+
111
+ for day in top_days:
112
+ day_data = df[df['date'] == day]
113
+
114
+ # Group by cluster_id
115
+ clusters_data = []
116
+ for cluster_id, group in day_data.groupby('cluster_id'):
117
+ # Pick a representative title (e.g., the first one or longest)
118
+ # We'll take the first one for simplicity, or maybe the one with most extracted_authors?
119
+ rep_article = group.iloc[0]
120
+
121
+ cluster_articles = []
122
+ for _, row in group.iterrows():
123
+ cluster_articles.append({
124
+ "title": row.get('title', 'No Title'),
125
+ "newspaper": row.get('newspaper', 'Unknown'),
126
+ "url": row.get('article_url', '#'),
127
+ "newspaper_url": row.get('newspaper_url', '#')
128
+ })
129
+
130
+ clusters_data.append({
131
+ "cluster_id": str(cluster_id),
132
+ "title": rep_article.get('title', 'Cluster Title'),
133
+ "size": int(group['cluster_size'].iloc[0]) if 'cluster_size' in group else len(group),
134
+ "articles": cluster_articles
135
+ })
136
+
137
+ result.append({
138
+ "date": day,
139
+ "clusters": clusters_data
140
+ })
141
+
142
+ return result
143
+
144
+ @app.post("/api/analyze")
145
+ def analyze_article(request: AnalyzeRequest):
146
+ url = request.url
147
+
148
+ try:
149
+ # Scrape
150
+ article = Article(url)
151
+ article.download()
152
+ article.parse()
153
+
154
+ title = article.title
155
+ text = article.text
156
+
157
+ # Inference
158
+ # Clickbait: using title
159
+ cb_inputs = models["cb_tokenizer"](title, return_tensors="pt", truncation=True, max_length=512)
160
+ with torch.no_grad():
161
+ cb_outputs = models["cb_model"](**cb_inputs)
162
+ cb_probs = torch.softmax(cb_outputs.logits, dim=1)
163
+ # Assuming label 0 is NO, 1 is YES (need to verify mapping usually, usually 1 is positive class)
164
+ # Checking model config locally or on HF would be ideal. Let's assume standard mapping:
165
+ # For clickbait: likely labels are "no_clickbait", "clickbait".
166
+ # We can check specific model details if needed. For now assume:
167
+ # Label 1 is clickbait.
168
+ is_clickbait = torch.argmax(cb_probs).item() == 1
169
+
170
+ # Sensationalism: using title + text
171
+ sens_input_text = f"TITULAR: {title}\nCUERPO: {text}"
172
+ sens_inputs = models["sens_tokenizer"](sens_input_text, return_tensors="pt", truncation=True, max_length=512)
173
+ with torch.no_grad():
174
+ sens_outputs = models["sens_model"](**sens_inputs)
175
+ sens_probs = torch.softmax(sens_outputs.logits, dim=1)
176
+ is_sensationalist = torch.argmax(sens_probs).item() == 1
177
+
178
+ return {
179
+ "title": title,
180
+ "text_snippet": text[:200] + "..." if len(text) > 200 else text,
181
+ "is_clickbait": is_clickbait,
182
+ "is_sensationalist": is_sensationalist,
183
+ "clickbait_conf": float(cb_probs[0][1]),
184
+ "sensationalist_conf": float(sens_probs[0][1])
185
+ }
186
+
187
+ except Exception as e:
188
+ logger.error(f"Analysis failed: {e}")
189
+ raise HTTPException(status_code=500, detail=str(e))
190
+
191
+ # Static files for Frontend (will be populated after build)
192
+ app.mount("/", StaticFiles(directory="static", html=True), name="static")
193
+
194
+ if __name__ == "__main__":
195
+ import uvicorn
196
+ uvicorn.run(app, host="0.0.0.0", port=8000)
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ pandas
4
+ pyarrow
5
+ torch
6
+ transformers
7
+ newspaper3k
8
+ lxml[html_clean]
9
+ python-multipart
10
+ scipy
11
+ sklearn
12
+ emoji
src/dataset.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eda3135ba3230dfe801b6b4cd0ac7b127a95abee7be3dd4687192911c510c834
3
+ size 28200022