Spaces:
Running
Running
| # api/app.py | |
| from pathlib import Path | |
| import sqlite3 | |
| from typing import Optional | |
| import json | |
| import pandas as pd | |
| from fastapi import FastAPI, Query | |
| from fastapi.middleware.cors import CORSMiddleware | |
| app = FastAPI(title="Green Energy News API", version="1.0.0") | |
| import os | |
| from pathlib import Path | |
| DB_PATH = Path("/app/data/news.db") | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=False, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| LAST_14_DAYS_CONDITION = "date(published_at) >= date('now', '-14 days')" | |
| def get_connection() -> sqlite3.Connection: | |
| return sqlite3.connect(DB_PATH) | |
| def health(): | |
| return { | |
| "status": "ok", | |
| "db_exists": DB_PATH.exists(), | |
| "db_path": str(DB_PATH), | |
| } | |
| def get_labels(): | |
| conn = get_connection() | |
| query = """ | |
| SELECT DISTINCT label | |
| FROM classified_articles | |
| WHERE label IS NOT NULL | |
| AND label != 'not relevant to field' | |
| AND """ + LAST_14_DAYS_CONDITION + """ | |
| ORDER BY label | |
| """ | |
| df = pd.read_sql_query(query, conn) | |
| conn.close() | |
| return df["label"].dropna().tolist() | |
| def get_sources(): | |
| conn = get_connection() | |
| query = """ | |
| SELECT DISTINCT source | |
| FROM classified_articles | |
| WHERE source IS NOT NULL | |
| AND label != 'not relevant to field' | |
| AND """ + LAST_14_DAYS_CONDITION + """ | |
| ORDER BY source | |
| """ | |
| df = pd.read_sql_query(query, conn) | |
| conn.close() | |
| return df["source"].dropna().tolist() | |
| def get_daily_summary(): | |
| conn = get_connection() | |
| query = """ | |
| SELECT | |
| summary_date, | |
| short_summary, | |
| key_focus, | |
| summary_json, | |
| generated_at | |
| FROM daily_summaries | |
| ORDER BY summary_date DESC | |
| LIMIT 1 | |
| """ | |
| df = pd.read_sql_query(query, conn) | |
| conn.close() | |
| if df.empty: | |
| return {} | |
| row = df.iloc[0].to_dict() | |
| result = { | |
| "summary_date": row.get("summary_date"), | |
| "generated_at": row.get("generated_at"), | |
| } | |
| summary_json = row.get("summary_json") | |
| if summary_json: | |
| try: | |
| parsed_summary = json.loads(summary_json) | |
| if isinstance(parsed_summary, dict): | |
| result.update(parsed_summary) | |
| except Exception: | |
| pass | |
| # fallback compatibility | |
| if "executive_summary" not in result: | |
| result["executive_summary"] = row.get("short_summary") | |
| if "recommended_focus" not in result: | |
| result["recommended_focus"] = row.get("key_focus") | |
| if "decision_implications" not in result: | |
| result["decision_implications"] = [] | |
| if "watchlist" not in result: | |
| result["watchlist"] = [] | |
| if "top_stories" not in result: | |
| result["top_stories"] = [] | |
| return result | |
| def daily_actions( | |
| start_date: Optional[str] = None, | |
| end_date: Optional[str] = None, | |
| ): | |
| conn = get_connection() | |
| query = """ | |
| SELECT | |
| date(published_at) AS day, | |
| label, | |
| COUNT(*) AS count | |
| FROM classified_articles | |
| WHERE 1=1 | |
| AND label != 'not relevant to field' | |
| AND """ + LAST_14_DAYS_CONDITION | |
| params = [] | |
| if start_date: | |
| query += " AND date(published_at) >= date(?)" | |
| params.append(start_date) | |
| if end_date: | |
| query += " AND date(published_at) <= date(?)" | |
| params.append(end_date) | |
| query += """ | |
| GROUP BY date(published_at), label | |
| ORDER BY day ASC, label ASC | |
| """ | |
| df = pd.read_sql_query(query, conn, params=params) | |
| conn.close() | |
| return df.to_dict(orient="records") | |
| def get_articles( | |
| label: Optional[str] = None, | |
| source: Optional[str] = None, | |
| start_date: Optional[str] = None, | |
| end_date: Optional[str] = None, | |
| search: Optional[str] = None, | |
| limit: int = Query(50, ge=1, le=500), | |
| offset: int = Query(0, ge=0), | |
| ): | |
| conn = get_connection() | |
| query = """ | |
| SELECT | |
| article_id, | |
| title, | |
| description, | |
| clean_text, | |
| label, | |
| raw_label, | |
| source, | |
| url, | |
| published_at, | |
| classified_at | |
| FROM classified_articles | |
| WHERE 1=1 | |
| AND label != 'not relevant to field' | |
| AND """ + LAST_14_DAYS_CONDITION | |
| params = [] | |
| if label: | |
| query += " AND label = ?" | |
| params.append(label) | |
| if source: | |
| query += " AND source = ?" | |
| params.append(source) | |
| if start_date: | |
| query += " AND date(published_at) >= date(?)" | |
| params.append(start_date) | |
| if end_date: | |
| query += " AND date(published_at) <= date(?)" | |
| params.append(end_date) | |
| if search: | |
| query += " AND (lower(title) LIKE ? OR lower(description) LIKE ?)" | |
| pattern = f"%{search.lower()}%" | |
| params.extend([pattern, pattern]) | |
| query += " ORDER BY published_at DESC LIMIT ? OFFSET ?" | |
| params.extend([limit, offset]) | |
| df = pd.read_sql_query(query, conn, params=params) | |
| conn.close() | |
| return df.to_dict(orient="records") | |
| # ========================= | |
| # Monitoring endpoints | |
| # ========================= | |
| def get_monitoring_results( | |
| overall_status: Optional[str] = None, | |
| requires_human_review: Optional[int] = None, | |
| label_judgment: Optional[str] = None, | |
| predicted_label: Optional[str] = None, | |
| source: Optional[str] = None, | |
| start_date: Optional[str] = None, | |
| end_date: Optional[str] = None, | |
| search: Optional[str] = None, | |
| limit: int = Query(100, ge=1, le=500), | |
| offset: int = Query(0, ge=0), | |
| ): | |
| conn = get_connection() | |
| query = """ | |
| SELECT | |
| monitoring_id, | |
| article_id, | |
| title, | |
| description, | |
| clean_text, | |
| predicted_label, | |
| source, | |
| url, | |
| published_at, | |
| classified_at, | |
| label_judgment, | |
| label_confidence, | |
| label_explanation, | |
| overall_status, | |
| requires_human_review, | |
| judge_model, | |
| raw_judge_response, | |
| evaluated_at | |
| FROM monitoring_results | |
| WHERE 1=1 | |
| AND """ + LAST_14_DAYS_CONDITION | |
| params = [] | |
| if overall_status: | |
| query += " AND overall_status = ?" | |
| params.append(overall_status) | |
| if requires_human_review is not None: | |
| query += " AND requires_human_review = ?" | |
| params.append(requires_human_review) | |
| if label_judgment: | |
| query += " AND label_judgment = ?" | |
| params.append(label_judgment) | |
| if predicted_label: | |
| query += " AND predicted_label = ?" | |
| params.append(predicted_label) | |
| if source: | |
| query += " AND source = ?" | |
| params.append(source) | |
| if start_date: | |
| query += " AND date(published_at) >= date(?)" | |
| params.append(start_date) | |
| if end_date: | |
| query += " AND date(published_at) <= date(?)" | |
| params.append(end_date) | |
| if search: | |
| query += " AND (lower(title) LIKE ? OR lower(description) LIKE ?)" | |
| pattern = f"%{search.lower()}%" | |
| params.extend([pattern, pattern]) | |
| query += " ORDER BY evaluated_at DESC LIMIT ? OFFSET ?" | |
| params.extend([limit, offset]) | |
| df = pd.read_sql_query(query, conn, params=params) | |
| conn.close() | |
| return df.to_dict(orient="records") | |
| def get_monitoring_summary(): | |
| conn = get_connection() | |
| total_monitored = int(pd.read_sql_query( | |
| """ | |
| SELECT COUNT(*) AS n | |
| FROM monitoring_results | |
| WHERE """ + LAST_14_DAYS_CONDITION, | |
| conn | |
| )["n"].iloc[0]) | |
| needs_review = int(pd.read_sql_query( | |
| """ | |
| SELECT COUNT(*) AS n | |
| FROM monitoring_results | |
| WHERE requires_human_review = 1 | |
| AND """ + LAST_14_DAYS_CONDITION, | |
| conn | |
| )["n"].iloc[0]) | |
| label_distribution = pd.read_sql_query( | |
| """ | |
| SELECT label_judgment, COUNT(*) AS count | |
| FROM monitoring_results | |
| WHERE """ + LAST_14_DAYS_CONDITION + """ | |
| GROUP BY label_judgment | |
| ORDER BY count DESC | |
| """, | |
| conn | |
| ).to_dict(orient="records") | |
| status_distribution = pd.read_sql_query( | |
| """ | |
| SELECT overall_status, COUNT(*) AS count | |
| FROM monitoring_results | |
| WHERE """ + LAST_14_DAYS_CONDITION + """ | |
| GROUP BY overall_status | |
| ORDER BY count DESC | |
| """, | |
| conn | |
| ).to_dict(orient="records") | |
| common_problem_labels = pd.read_sql_query( | |
| """ | |
| SELECT predicted_label, COUNT(*) AS count | |
| FROM monitoring_results | |
| WHERE overall_status != 'ok' | |
| AND """ + LAST_14_DAYS_CONDITION + """ | |
| GROUP BY predicted_label | |
| ORDER BY count DESC | |
| """, | |
| conn | |
| ).to_dict(orient="records") | |
| daily_issues = pd.read_sql_query( | |
| """ | |
| SELECT | |
| date(evaluated_at) AS day, | |
| overall_status, | |
| COUNT(*) AS count | |
| FROM monitoring_results | |
| WHERE """ + LAST_14_DAYS_CONDITION + """ | |
| GROUP BY date(evaluated_at), overall_status | |
| ORDER BY day ASC, overall_status ASC | |
| """, | |
| conn | |
| ).to_dict(orient="records") | |
| conn.close() | |
| return { | |
| "total_monitored": total_monitored, | |
| "needs_review": needs_review, | |
| "label_distribution": label_distribution, | |
| "status_distribution": status_distribution, | |
| "common_problem_labels": common_problem_labels, | |
| "daily_issues": daily_issues, | |
| } | |
| def get_review_queue(limit: int = Query(100, ge=1, le=500)): | |
| conn = get_connection() | |
| query = """ | |
| SELECT | |
| monitoring_id, | |
| article_id, | |
| title, | |
| description, | |
| predicted_label, | |
| source, | |
| url, | |
| published_at, | |
| label_judgment, | |
| label_confidence, | |
| label_explanation, | |
| overall_status, | |
| requires_human_review, | |
| evaluated_at | |
| FROM monitoring_results | |
| WHERE requires_human_review = 1 | |
| AND """ + LAST_14_DAYS_CONDITION + """ | |
| ORDER BY evaluated_at DESC | |
| LIMIT ? | |
| """ | |
| df = pd.read_sql_query(query, conn, params=[limit]) | |
| conn.close() | |
| return df.to_dict(orient="records") |