danielhjerresen commited on
Commit
74fb3cd
·
verified ·
1 Parent(s): 315cb29

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile.txt +12 -0
  2. app.py +417 -0
  3. requirements.txt +3 -0
Dockerfile.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt /app/requirements.txt
6
+ RUN pip install --no-cache-dir -r /app/requirements.txt
7
+
8
+ COPY . /app
9
+
10
+ EXPOSE 7860
11
+
12
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,417 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # api/app.py
2
+ from pathlib import Path
3
+ import sqlite3
4
+ from typing import Optional
5
+ import json
6
+
7
+ import pandas as pd
8
+ from fastapi import FastAPI, Query
9
+ from fastapi.middleware.cors import CORSMiddleware
10
+
11
+ app = FastAPI(title="Green Energy News API", version="1.0.0")
12
+
13
+ import os
14
+ from pathlib import Path
15
+
16
+ # Test - fjernes efter lokal test
17
+ DB_PATH = Path(
18
+ os.getenv(
19
+ "DB_PATH",
20
+ Path(__file__).resolve().parent.parent / "data" / "news.db"
21
+ )
22
+ )
23
+ # DB_PATH = Path("/app/data/news.db")
24
+
25
+ app.add_middleware(
26
+ CORSMiddleware,
27
+ allow_origins=["*"],
28
+ allow_credentials=False,
29
+ allow_methods=["*"],
30
+ allow_headers=["*"],
31
+ )
32
+
33
+
34
+ def get_connection() -> sqlite3.Connection:
35
+ return sqlite3.connect(DB_PATH)
36
+
37
+
38
+ @app.get("/health")
39
+ def health():
40
+ return {
41
+ "status": "ok",
42
+ "db_exists": DB_PATH.exists(),
43
+ "db_path": str(DB_PATH),
44
+ }
45
+
46
+
47
+ @app.get("/labels")
48
+ def get_labels():
49
+ conn = get_connection()
50
+ query = """
51
+ SELECT DISTINCT label
52
+ FROM classified_articles
53
+ WHERE label IS NOT NULL
54
+ AND label != 'not relevant to field'
55
+ ORDER BY label
56
+ """
57
+ df = pd.read_sql_query(query, conn)
58
+ conn.close()
59
+ return df["label"].dropna().tolist()
60
+
61
+
62
+ @app.get("/sources")
63
+ def get_sources():
64
+ conn = get_connection()
65
+ query = """
66
+ SELECT DISTINCT source
67
+ FROM classified_articles
68
+ WHERE source IS NOT NULL
69
+ AND label != 'not relevant to field'
70
+ ORDER BY source
71
+ """
72
+ df = pd.read_sql_query(query, conn)
73
+ conn.close()
74
+ return df["source"].dropna().tolist()
75
+
76
+ @app.get("/summary/daily")
77
+ def get_daily_summary():
78
+ conn = get_connection()
79
+
80
+ query = """
81
+ SELECT
82
+ summary_date,
83
+ short_summary,
84
+ key_focus,
85
+ summary_json,
86
+ generated_at
87
+ FROM daily_summaries
88
+ ORDER BY summary_date DESC
89
+ LIMIT 1
90
+ """
91
+
92
+ df = pd.read_sql_query(query, conn)
93
+ conn.close()
94
+
95
+ if df.empty:
96
+ return {}
97
+
98
+ row = df.iloc[0].to_dict()
99
+
100
+ result = {
101
+ "summary_date": row.get("summary_date"),
102
+ "generated_at": row.get("generated_at"),
103
+ }
104
+
105
+ summary_json = row.get("summary_json")
106
+
107
+ if summary_json:
108
+ try:
109
+ parsed_summary = json.loads(summary_json)
110
+
111
+ if isinstance(parsed_summary, dict):
112
+ result.update(parsed_summary)
113
+
114
+ except Exception:
115
+ pass
116
+
117
+ # fallback compatibility
118
+ if "executive_summary" not in result:
119
+ result["executive_summary"] = row.get("short_summary")
120
+
121
+ if "recommended_focus" not in result:
122
+ result["recommended_focus"] = row.get("key_focus")
123
+
124
+ if "decision_implications" not in result:
125
+ result["decision_implications"] = []
126
+
127
+ if "watchlist" not in result:
128
+ result["watchlist"] = []
129
+
130
+ if "top_stories" not in result:
131
+ result["top_stories"] = []
132
+
133
+ return result
134
+
135
+ @app.get("/summary/daily-actions")
136
+ def daily_actions(
137
+ start_date: Optional[str] = None,
138
+ end_date: Optional[str] = None,
139
+ ):
140
+ conn = get_connection()
141
+
142
+ query = """
143
+ SELECT
144
+ date(published_at) AS day,
145
+ label,
146
+ COUNT(*) AS count
147
+ FROM classified_articles
148
+ WHERE 1=1
149
+ AND label != 'not relevant to field'
150
+ """
151
+ params = []
152
+
153
+ if start_date:
154
+ query += " AND date(published_at) >= date(?)"
155
+ params.append(start_date)
156
+
157
+ if end_date:
158
+ query += " AND date(published_at) <= date(?)"
159
+ params.append(end_date)
160
+
161
+ query += """
162
+ GROUP BY date(published_at), label
163
+ ORDER BY day ASC, label ASC
164
+ """
165
+
166
+ df = pd.read_sql_query(query, conn, params=params)
167
+ conn.close()
168
+
169
+ return df.to_dict(orient="records")
170
+
171
+
172
+ @app.get("/articles")
173
+ def get_articles(
174
+ label: Optional[str] = None,
175
+ source: Optional[str] = None,
176
+ start_date: Optional[str] = None,
177
+ end_date: Optional[str] = None,
178
+ search: Optional[str] = None,
179
+ limit: int = Query(50, ge=1, le=500),
180
+ offset: int = Query(0, ge=0),
181
+ ):
182
+ conn = get_connection()
183
+
184
+ query = """
185
+ SELECT
186
+ article_id,
187
+ title,
188
+ description,
189
+ clean_text,
190
+ label,
191
+ raw_label,
192
+ source,
193
+ url,
194
+ published_at,
195
+ classified_at
196
+ FROM classified_articles
197
+ WHERE 1=1
198
+ AND label != 'not relevant to field'
199
+ """
200
+ params = []
201
+
202
+ if label:
203
+ query += " AND label = ?"
204
+ params.append(label)
205
+
206
+ if source:
207
+ query += " AND source = ?"
208
+ params.append(source)
209
+
210
+ if start_date:
211
+ query += " AND date(published_at) >= date(?)"
212
+ params.append(start_date)
213
+
214
+ if end_date:
215
+ query += " AND date(published_at) <= date(?)"
216
+ params.append(end_date)
217
+
218
+ if search:
219
+ query += " AND (lower(title) LIKE ? OR lower(description) LIKE ?)"
220
+ pattern = f"%{search.lower()}%"
221
+ params.extend([pattern, pattern])
222
+
223
+ query += " ORDER BY published_at DESC LIMIT ? OFFSET ?"
224
+ params.extend([limit, offset])
225
+
226
+ df = pd.read_sql_query(query, conn, params=params)
227
+ conn.close()
228
+
229
+ return df.to_dict(orient="records")
230
+
231
+
232
+ # =========================
233
+ # Monitoring endpoints
234
+ # =========================
235
+
236
+ @app.get("/monitoring/results")
237
+ def get_monitoring_results(
238
+ overall_status: Optional[str] = None,
239
+ requires_human_review: Optional[int] = None,
240
+ label_judgment: Optional[str] = None,
241
+ predicted_label: Optional[str] = None,
242
+ source: Optional[str] = None,
243
+ start_date: Optional[str] = None,
244
+ end_date: Optional[str] = None,
245
+ search: Optional[str] = None,
246
+ limit: int = Query(100, ge=1, le=500),
247
+ offset: int = Query(0, ge=0),
248
+ ):
249
+ conn = get_connection()
250
+
251
+ query = """
252
+ SELECT
253
+ monitoring_id,
254
+ article_id,
255
+ title,
256
+ description,
257
+ clean_text,
258
+ predicted_label,
259
+ source,
260
+ url,
261
+ published_at,
262
+ classified_at,
263
+ label_judgment,
264
+ label_confidence,
265
+ label_explanation,
266
+ overall_status,
267
+ requires_human_review,
268
+ judge_model,
269
+ raw_judge_response,
270
+ evaluated_at
271
+ FROM monitoring_results
272
+ WHERE 1=1
273
+ """
274
+ params = []
275
+
276
+ if overall_status:
277
+ query += " AND overall_status = ?"
278
+ params.append(overall_status)
279
+
280
+ if requires_human_review is not None:
281
+ query += " AND requires_human_review = ?"
282
+ params.append(requires_human_review)
283
+
284
+ if label_judgment:
285
+ query += " AND label_judgment = ?"
286
+ params.append(label_judgment)
287
+
288
+ if predicted_label:
289
+ query += " AND predicted_label = ?"
290
+ params.append(predicted_label)
291
+
292
+ if source:
293
+ query += " AND source = ?"
294
+ params.append(source)
295
+
296
+ if start_date:
297
+ query += " AND date(published_at) >= date(?)"
298
+ params.append(start_date)
299
+
300
+ if end_date:
301
+ query += " AND date(published_at) <= date(?)"
302
+ params.append(end_date)
303
+
304
+ if search:
305
+ query += " AND (lower(title) LIKE ? OR lower(description) LIKE ?)"
306
+ pattern = f"%{search.lower()}%"
307
+ params.extend([pattern, pattern])
308
+
309
+ query += " ORDER BY evaluated_at DESC LIMIT ? OFFSET ?"
310
+ params.extend([limit, offset])
311
+
312
+ df = pd.read_sql_query(query, conn, params=params)
313
+ conn.close()
314
+
315
+ return df.to_dict(orient="records")
316
+
317
+
318
+ @app.get("/monitoring/summary")
319
+ def get_monitoring_summary():
320
+ conn = get_connection()
321
+
322
+ total_monitored = int(pd.read_sql_query(
323
+ "SELECT COUNT(*) AS n FROM monitoring_results",
324
+ conn
325
+ )["n"].iloc[0])
326
+
327
+ needs_review = int(pd.read_sql_query(
328
+ "SELECT COUNT(*) AS n FROM monitoring_results WHERE requires_human_review = 1",
329
+ conn
330
+ )["n"].iloc[0])
331
+
332
+ label_distribution = pd.read_sql_query(
333
+ """
334
+ SELECT label_judgment, COUNT(*) AS count
335
+ FROM monitoring_results
336
+ GROUP BY label_judgment
337
+ ORDER BY count DESC
338
+ """,
339
+ conn
340
+ ).to_dict(orient="records")
341
+
342
+ status_distribution = pd.read_sql_query(
343
+ """
344
+ SELECT overall_status, COUNT(*) AS count
345
+ FROM monitoring_results
346
+ GROUP BY overall_status
347
+ ORDER BY count DESC
348
+ """,
349
+ conn
350
+ ).to_dict(orient="records")
351
+
352
+ common_problem_labels = pd.read_sql_query(
353
+ """
354
+ SELECT predicted_label, COUNT(*) AS count
355
+ FROM monitoring_results
356
+ WHERE overall_status != 'ok'
357
+ GROUP BY predicted_label
358
+ ORDER BY count DESC
359
+ """,
360
+ conn
361
+ ).to_dict(orient="records")
362
+
363
+ daily_issues = pd.read_sql_query(
364
+ """
365
+ SELECT
366
+ date(evaluated_at) AS day,
367
+ overall_status,
368
+ COUNT(*) AS count
369
+ FROM monitoring_results
370
+ GROUP BY date(evaluated_at), overall_status
371
+ ORDER BY day ASC, overall_status ASC
372
+ """,
373
+ conn
374
+ ).to_dict(orient="records")
375
+
376
+ conn.close()
377
+
378
+ return {
379
+ "total_monitored": total_monitored,
380
+ "needs_review": needs_review,
381
+ "label_distribution": label_distribution,
382
+ "status_distribution": status_distribution,
383
+ "common_problem_labels": common_problem_labels,
384
+ "daily_issues": daily_issues,
385
+ }
386
+
387
+
388
+ @app.get("/monitoring/review-queue")
389
+ def get_review_queue(limit: int = Query(100, ge=1, le=500)):
390
+ conn = get_connection()
391
+
392
+ query = """
393
+ SELECT
394
+ monitoring_id,
395
+ article_id,
396
+ title,
397
+ description,
398
+ predicted_label,
399
+ source,
400
+ url,
401
+ published_at,
402
+ label_judgment,
403
+ label_confidence,
404
+ label_explanation,
405
+ overall_status,
406
+ requires_human_review,
407
+ evaluated_at
408
+ FROM monitoring_results
409
+ WHERE requires_human_review = 1
410
+ ORDER BY evaluated_at DESC
411
+ LIMIT ?
412
+ """
413
+
414
+ df = pd.read_sql_query(query, conn, params=[limit])
415
+ conn.close()
416
+
417
+ return df.to_dict(orient="records")
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ fastapi==0.110.0
2
+ uvicorn==0.29.0
3
+ pandas==2.2.2