isakskogstad commited on
Commit
526873c
Β·
verified Β·
1 Parent(s): f0ea187

Upload app_modern.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app_modern.py +695 -0
app_modern.py ADDED
@@ -0,0 +1,695 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ import pandas as pd
4
+ import plotly.express as px
5
+ import plotly.graph_objects as go
6
+ from plotly.subplots import make_subplots
7
+ import json
8
+ import time
9
+ from datetime import datetime, timedelta
10
+ import asyncio
11
+ import aiohttp
12
+ from typing import Dict, Any, List, Optional
13
+ import sqlite3
14
+ import hashlib
15
+ from concurrent.futures import ThreadPoolExecutor, as_completed
16
+ import xml.etree.ElementTree as ET
17
+
18
+ # Modern Page Configuration
19
+ st.set_page_config(
20
+ page_title="Global Data Harvester",
21
+ page_icon="🌍",
22
+ layout="wide",
23
+ initial_sidebar_state="collapsed"
24
+ )
25
+
26
+ # Modern CSS Styling
27
+ st.markdown("""
28
+ <style>
29
+ .main > div {
30
+ padding-top: 2rem;
31
+ }
32
+ .stApp {
33
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
34
+ color: white;
35
+ }
36
+ .metric-card {
37
+ background: rgba(255, 255, 255, 0.1);
38
+ backdrop-filter: blur(10px);
39
+ border-radius: 15px;
40
+ padding: 1.5rem;
41
+ margin: 0.5rem 0;
42
+ border: 1px solid rgba(255, 255, 255, 0.2);
43
+ box-shadow: 0 8px 32px 0 rgba(31, 38, 135, 0.37);
44
+ }
45
+ .api-card {
46
+ background: rgba(255, 255, 255, 0.15);
47
+ backdrop-filter: blur(10px);
48
+ border-radius: 12px;
49
+ padding: 1rem;
50
+ margin: 0.5rem;
51
+ border: 1px solid rgba(255, 255, 255, 0.3);
52
+ transition: all 0.3s ease;
53
+ }
54
+ .api-card:hover {
55
+ transform: translateY(-5px);
56
+ box-shadow: 0 12px 40px 0 rgba(31, 38, 135, 0.5);
57
+ }
58
+ .title-container {
59
+ text-align: center;
60
+ padding: 2rem 0;
61
+ background: rgba(255, 255, 255, 0.1);
62
+ border-radius: 20px;
63
+ margin-bottom: 2rem;
64
+ backdrop-filter: blur(15px);
65
+ }
66
+ .progress-ring {
67
+ transform: rotate(-90deg);
68
+ }
69
+ .progress-ring-circle {
70
+ transition: stroke-dashoffset 0.35s;
71
+ transform: rotate(-90deg);
72
+ transform-origin: 50% 50%;
73
+ }
74
+ .status-indicator {
75
+ width: 12px;
76
+ height: 12px;
77
+ border-radius: 50%;
78
+ display: inline-block;
79
+ margin-right: 8px;
80
+ }
81
+ .status-active { background-color: #4ade80; }
82
+ .status-warning { background-color: #fbbf24; }
83
+ .status-error { background-color: #ef4444; }
84
+ </style>
85
+ """, unsafe_allow_html=True)
86
+
87
+ # Enhanced API Configuration with correct endpoints based on research
88
+ API_CONFIG = {
89
+ "Skolverket": {
90
+ "name": "πŸ‡ΈπŸ‡ͺ Skolverket",
91
+ "description": "Swedish National Agency for Education",
92
+ "base_url": "https://api.skolverket.se",
93
+ "endpoints": {
94
+ "planned_educations": {
95
+ "url": "/planned-educations/v3/compact-school-units",
96
+ "method": "GET",
97
+ "headers": {"accept": "application/vnd.skolverket.plannededucations.api.v3.hal+json"},
98
+ "params": {"coordinateSystemType": "WGS84", "page": 0, "size": 20}
99
+ },
100
+ "school_register": {
101
+ "url": "/skolenhetsregister/v2/skolenhet",
102
+ "method": "GET",
103
+ "headers": {"accept": "application/json"},
104
+ "params": {}
105
+ },
106
+ "syllabus": {
107
+ "url": "/syllabus/v1/studievag",
108
+ "method": "GET",
109
+ "headers": {"accept": "application/json"},
110
+ "params": {"studievagstyp": "GY"}
111
+ }
112
+ },
113
+ "rate_limit": None,
114
+ "auth": None
115
+ },
116
+ "SCB": {
117
+ "name": "πŸ‡ΈπŸ‡ͺ Statistics Sweden",
118
+ "description": "Swedish National Statistics Office",
119
+ "base_url": "https://api.scb.se",
120
+ "endpoints": {
121
+ "population": {
122
+ "url": "/OV0104/v1/doris/sv/ssd/BE/BE0101/BE0101A/BefolkningNy",
123
+ "method": "POST",
124
+ "headers": {"accept": "application/json"},
125
+ "data": {"query": [], "response": {"format": "json"}}
126
+ }
127
+ },
128
+ "rate_limit": {"requests": 10, "per_seconds": 10},
129
+ "auth": None
130
+ },
131
+ "Kolada": {
132
+ "name": "πŸ‡ΈπŸ‡ͺ Kolada",
133
+ "description": "Municipal Key Performance Indicators",
134
+ "base_url": "https://api.kolada.se",
135
+ "endpoints": {
136
+ "municipalities": {
137
+ "url": "/v2/municipality",
138
+ "method": "GET",
139
+ "headers": {"accept": "application/json"},
140
+ "params": {}
141
+ },
142
+ "kpi_data": {
143
+ "url": "/v2/data/kpi/N00945",
144
+ "method": "GET",
145
+ "headers": {"accept": "application/json"},
146
+ "params": {"municipality": "1860", "year": "2023"}
147
+ }
148
+ },
149
+ "rate_limit": None,
150
+ "auth": None
151
+ },
152
+ "Eurostat": {
153
+ "name": "πŸ‡ͺπŸ‡Ί Eurostat",
154
+ "description": "European Union Statistics",
155
+ "base_url": "https://ec.europa.eu/eurostat",
156
+ "endpoints": {
157
+ "education": {
158
+ "url": "/api/dissemination/statistics/1.0/data/educ_uoe_enra21",
159
+ "method": "GET",
160
+ "headers": {"accept": "application/json"},
161
+ "params": {"format": "JSON", "lang": "EN"}
162
+ },
163
+ "population": {
164
+ "url": "/api/dissemination/statistics/1.0/data/demo_pjan",
165
+ "method": "GET",
166
+ "headers": {"accept": "application/json"},
167
+ "params": {"format": "JSON", "lang": "EN"}
168
+ }
169
+ },
170
+ "rate_limit": None,
171
+ "auth": None
172
+ },
173
+ "WHO": {
174
+ "name": "🌍 WHO",
175
+ "description": "World Health Organization",
176
+ "base_url": "https://ghoapi.azureedge.net",
177
+ "endpoints": {
178
+ "indicators": {
179
+ "url": "/api/Indicator",
180
+ "method": "GET",
181
+ "headers": {"accept": "application/json"},
182
+ "params": {"$top": "10"}
183
+ },
184
+ "life_expectancy": {
185
+ "url": "/api/GHO/WHOSIS_000001",
186
+ "method": "GET",
187
+ "headers": {"accept": "application/json"},
188
+ "params": {}
189
+ }
190
+ },
191
+ "rate_limit": None,
192
+ "auth": None
193
+ },
194
+ "OECD": {
195
+ "name": "🌍 OECD",
196
+ "description": "Organisation for Economic Co-operation and Development",
197
+ "base_url": "https://sdmx.oecd.org",
198
+ "endpoints": {
199
+ "gdp_data": {
200
+ "url": "/public/rest/data/OECD.SDD.NAD,DSD_NAMAIN1@NAAG,1.0/.A.GDP_V_CAP.",
201
+ "method": "GET",
202
+ "headers": {"accept": "application/vnd.sdmx.data+json;version=1.0.0"},
203
+ "params": {"startPeriod": "2020"}
204
+ }
205
+ },
206
+ "rate_limit": None,
207
+ "auth": None
208
+ },
209
+ "WorldBank": {
210
+ "name": "🌍 World Bank",
211
+ "description": "International Financial Institution",
212
+ "base_url": "https://api.worldbank.org",
213
+ "endpoints": {
214
+ "countries": {
215
+ "url": "/v2/country",
216
+ "method": "GET",
217
+ "headers": {"accept": "application/json"},
218
+ "params": {"format": "json", "per_page": "20"}
219
+ },
220
+ "gdp_indicator": {
221
+ "url": "/v2/country/all/indicator/NY.GDP.MKTP.CD",
222
+ "method": "GET",
223
+ "headers": {"accept": "application/json"},
224
+ "params": {"format": "json", "date": "2023"}
225
+ }
226
+ },
227
+ "rate_limit": None,
228
+ "auth": None
229
+ },
230
+ "Riksbanken": {
231
+ "name": "πŸ‡ΈπŸ‡ͺ Riksbanken",
232
+ "description": "Swedish Central Bank",
233
+ "base_url": "https://api.riksbank.se",
234
+ "endpoints": {
235
+ "eur_sek": {
236
+ "url": "/swea/v1/Observations/SEKEURPMI/2024-01-01/2024-12-31",
237
+ "method": "GET",
238
+ "headers": {"accept": "application/json"},
239
+ "params": {}
240
+ }
241
+ },
242
+ "rate_limit": None,
243
+ "auth": None
244
+ },
245
+ "Swecris": {
246
+ "name": "πŸ‡ΈπŸ‡ͺ Swecris",
247
+ "description": "Swedish Research Council Database",
248
+ "base_url": "https://swecris-api.vr.se",
249
+ "endpoints": {
250
+ "projects": {
251
+ "url": "/v1/projects",
252
+ "method": "GET",
253
+ "headers": {"accept": "application/json", "Authorization": "Bearer VRSwecrisAPI2025-1"},
254
+ "params": {"funderId": "VR"}
255
+ }
256
+ },
257
+ "rate_limit": None,
258
+ "auth": {"type": "Bearer", "token": "VRSwecrisAPI2025-1"}
259
+ },
260
+ "CSN": {
261
+ "name": "πŸ‡ΈπŸ‡ͺ CSN",
262
+ "description": "Swedish Board of Student Finance",
263
+ "base_url": "https://statistik.csn.se",
264
+ "endpoints": {
265
+ "student_aid": {
266
+ "url": "/PXWeb/api/v1/sv/CSNstat/Studiestod/Studiemedel/Hogskola/SS0101B1.px",
267
+ "method": "POST",
268
+ "headers": {"accept": "application/json"},
269
+ "data": {"query": [], "response": {"format": "json"}}
270
+ }
271
+ },
272
+ "rate_limit": None,
273
+ "auth": None
274
+ }
275
+ }
276
+
277
+ # Database setup
278
+ DB_PATH = "global_data_harvester.db"
279
+
280
+ def init_database():
281
+ """Initialize modern SQLite database"""
282
+ conn = sqlite3.connect(DB_PATH)
283
+ cursor = conn.cursor()
284
+
285
+ cursor.execute('''
286
+ CREATE TABLE IF NOT EXISTS api_data (
287
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
288
+ api_name TEXT NOT NULL,
289
+ endpoint_name TEXT NOT NULL,
290
+ data_hash TEXT UNIQUE,
291
+ raw_data TEXT,
292
+ record_count INTEGER,
293
+ fetch_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
294
+ status TEXT DEFAULT 'success'
295
+ )
296
+ ''')
297
+
298
+ cursor.execute('''
299
+ CREATE TABLE IF NOT EXISTS fetch_sessions (
300
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
301
+ session_name TEXT NOT NULL,
302
+ started_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
303
+ completed_at TIMESTAMP,
304
+ total_apis INTEGER,
305
+ successful_fetches INTEGER,
306
+ failed_fetches INTEGER,
307
+ total_records INTEGER
308
+ )
309
+ ''')
310
+
311
+ conn.commit()
312
+ conn.close()
313
+
314
+ class ModernDataFetcher:
315
+ """Modern, clean data fetcher with proper error handling"""
316
+
317
+ def __init__(self):
318
+ self.session = requests.Session()
319
+ self.session.headers.update({
320
+ 'User-Agent': 'Global-Data-Harvester/1.0 (Research & Education Purpose)'
321
+ })
322
+ init_database()
323
+
324
+ def fetch_api_data(self, api_name: str, endpoint_name: str) -> Dict[str, Any]:
325
+ """Fetch data from a specific API endpoint"""
326
+ try:
327
+ config = API_CONFIG[api_name]
328
+ endpoint = config["endpoints"][endpoint_name]
329
+
330
+ # Apply rate limiting
331
+ if config.get("rate_limit"):
332
+ rate_limit = config["rate_limit"]
333
+ time.sleep(rate_limit["per_seconds"] / rate_limit["requests"])
334
+
335
+ # Prepare request
336
+ url = config["base_url"] + endpoint["url"]
337
+ headers = endpoint.get("headers", {})
338
+
339
+ # Make request based on method
340
+ if endpoint["method"] == "POST":
341
+ response = self.session.post(
342
+ url,
343
+ json=endpoint.get("data", {}),
344
+ headers=headers,
345
+ timeout=30
346
+ )
347
+ else:
348
+ params = endpoint.get("params", {})
349
+ response = self.session.get(url, params=params, headers=headers, timeout=30)
350
+
351
+ response.raise_for_status()
352
+
353
+ # Parse response
354
+ try:
355
+ data = response.json()
356
+ except:
357
+ data = {"raw_response": response.text}
358
+
359
+ # Process and clean data
360
+ processed_data = self._process_response(api_name, data)
361
+
362
+ # Save to database
363
+ self._save_to_database(api_name, endpoint_name, processed_data)
364
+
365
+ return {
366
+ "status": "success",
367
+ "data": processed_data,
368
+ "record_count": len(processed_data) if isinstance(processed_data, list) else 1,
369
+ "timestamp": datetime.now().isoformat()
370
+ }
371
+
372
+ except Exception as e:
373
+ return {
374
+ "status": "error",
375
+ "error": str(e),
376
+ "timestamp": datetime.now().isoformat()
377
+ }
378
+
379
+ def _process_response(self, api_name: str, data: Any) -> Any:
380
+ """Process API response based on known structures"""
381
+ if api_name == "Skolverket":
382
+ if isinstance(data, dict) and "_embedded" in data:
383
+ for key, value in data["_embedded"].items():
384
+ if isinstance(value, list):
385
+ return value
386
+ return data
387
+
388
+ elif api_name == "SCB":
389
+ if isinstance(data, dict):
390
+ return data.get("data", data)
391
+
392
+ elif api_name == "Kolada":
393
+ if isinstance(data, dict):
394
+ return data.get("values", data)
395
+
396
+ elif api_name == "Eurostat":
397
+ if isinstance(data, dict):
398
+ return data.get("value", data)
399
+
400
+ elif api_name == "WHO":
401
+ if isinstance(data, dict):
402
+ return data.get("value", data)
403
+
404
+ elif api_name == "WorldBank":
405
+ if isinstance(data, list) and len(data) > 1:
406
+ return data[1] if data[1] else data[0]
407
+ return data
408
+
409
+ elif api_name == "Riksbanken":
410
+ if isinstance(data, dict):
411
+ return data.get("observations", data)
412
+
413
+ elif api_name == "Swecris":
414
+ if isinstance(data, dict):
415
+ return data.get("items", data)
416
+
417
+ elif api_name == "CSN":
418
+ if isinstance(data, dict):
419
+ return data.get("data", data)
420
+
421
+ return data
422
+
423
+ def _save_to_database(self, api_name: str, endpoint_name: str, data: Any):
424
+ """Save data to database with deduplication"""
425
+ conn = sqlite3.connect(DB_PATH)
426
+ cursor = conn.cursor()
427
+
428
+ # Create hash for deduplication
429
+ data_str = json.dumps(data, sort_keys=True, default=str)
430
+ data_hash = hashlib.sha256(data_str.encode()).hexdigest()
431
+
432
+ record_count = len(data) if isinstance(data, list) else 1
433
+
434
+ try:
435
+ cursor.execute('''
436
+ INSERT OR IGNORE INTO api_data
437
+ (api_name, endpoint_name, data_hash, raw_data, record_count)
438
+ VALUES (?, ?, ?, ?, ?)
439
+ ''', (api_name, endpoint_name, data_hash, data_str, record_count))
440
+
441
+ conn.commit()
442
+ except sqlite3.IntegrityError:
443
+ pass # Data already exists
444
+ finally:
445
+ conn.close()
446
+
447
+ # Initialize session state
448
+ if 'fetcher' not in st.session_state:
449
+ st.session_state.fetcher = ModernDataFetcher()
450
+
451
+ if 'fetch_results' not in st.session_state:
452
+ st.session_state.fetch_results = {}
453
+
454
+ if 'active_apis' not in st.session_state:
455
+ st.session_state.active_apis = set()
456
+
457
+ # Modern Header
458
+ st.markdown("""
459
+ <div class="title-container">
460
+ <h1 style="font-size: 3rem; margin: 0; background: linear-gradient(45deg, #fff, #f0f0f0); -webkit-background-clip: text; -webkit-text-fill-color: transparent;">
461
+ 🌍 Global Data Harvester
462
+ </h1>
463
+ <p style="font-size: 1.2rem; margin: 0.5rem 0 0 0; opacity: 0.9;">
464
+ Intelligent data collection from 10 international sources
465
+ </p>
466
+ <div style="margin-top: 1rem;">
467
+ <span style="background: rgba(255,255,255,0.2); padding: 0.3rem 0.8rem; border-radius: 15px; margin: 0 0.3rem; font-size: 0.9rem;">Real-time</span>
468
+ <span style="background: rgba(255,255,255,0.2); padding: 0.3rem 0.8rem; border-radius: 15px; margin: 0 0.3rem; font-size: 0.9rem;">Auto-discovery</span>
469
+ <span style="background: rgba(255,255,255,0.2); padding: 0.3rem 0.8rem; border-radius: 15px; margin: 0 0.3rem; font-size: 0.9rem;">Parallel processing</span>
470
+ </div>
471
+ </div>
472
+ """, unsafe_allow_html=True)
473
+
474
+ # Main dashboard layout
475
+ col1, col2 = st.columns([2, 1])
476
+
477
+ with col1:
478
+ st.markdown("### 🎯 API Sources")
479
+
480
+ # Create API grid
481
+ api_cols = st.columns(2)
482
+
483
+ for i, (api_key, api_config) in enumerate(API_CONFIG.items()):
484
+ col = api_cols[i % 2]
485
+
486
+ with col:
487
+ # Get status
488
+ last_fetch = st.session_state.fetch_results.get(api_key, {})
489
+ status = last_fetch.get("status", "inactive")
490
+
491
+ status_color = {
492
+ "success": "status-active",
493
+ "error": "status-error",
494
+ "inactive": "status-warning"
495
+ }.get(status, "status-warning")
496
+
497
+ # Create API card
498
+ st.markdown(f"""
499
+ <div class="api-card">
500
+ <div style="display: flex; align-items: center; margin-bottom: 0.5rem;">
501
+ <span class="status-indicator {status_color}"></span>
502
+ <strong>{api_config['name']}</strong>
503
+ </div>
504
+ <p style="margin: 0; font-size: 0.9rem; opacity: 0.8;">{api_config['description']}</p>
505
+ <p style="margin: 0.5rem 0 0 0; font-size: 0.8rem;">
506
+ {len(api_config['endpoints'])} endpoints available
507
+ </p>
508
+ </div>
509
+ """, unsafe_allow_html=True)
510
+
511
+ # Fetch button for each API
512
+ if st.button(f"Fetch {api_key}", key=f"fetch_{api_key}", use_container_width=True):
513
+ with st.spinner(f"Fetching data from {api_key}..."):
514
+ results = {}
515
+ for endpoint_name in api_config["endpoints"].keys():
516
+ result = st.session_state.fetcher.fetch_api_data(api_key, endpoint_name)
517
+ results[endpoint_name] = result
518
+
519
+ st.session_state.fetch_results[api_key] = results
520
+ st.session_state.active_apis.add(api_key)
521
+ st.rerun()
522
+
523
+ with col2:
524
+ st.markdown("### πŸ“Š Live Metrics")
525
+
526
+ # Calculate metrics
527
+ total_apis = len(API_CONFIG)
528
+ active_apis = len(st.session_state.active_apis)
529
+ total_successful = sum(
530
+ 1 for api_results in st.session_state.fetch_results.values()
531
+ for result in api_results.values()
532
+ if result.get("status") == "success"
533
+ )
534
+ total_records = sum(
535
+ result.get("record_count", 0)
536
+ for api_results in st.session_state.fetch_results.values()
537
+ for result in api_results.values()
538
+ if result.get("status") == "success"
539
+ )
540
+
541
+ # Display metrics with modern cards
542
+ metrics = [
543
+ ("🎯 Active APIs", f"{active_apis}/{total_apis}"),
544
+ ("βœ… Successful", total_successful),
545
+ ("πŸ“Š Total Records", f"{total_records:,}"),
546
+ ("πŸ•’ Last Update", datetime.now().strftime("%H:%M:%S"))
547
+ ]
548
+
549
+ for label, value in metrics:
550
+ st.markdown(f"""
551
+ <div class="metric-card">
552
+ <div style="font-size: 0.9rem; opacity: 0.8; margin-bottom: 0.3rem;">{label}</div>
553
+ <div style="font-size: 1.5rem; font-weight: bold;">{value}</div>
554
+ </div>
555
+ """, unsafe_allow_html=True)
556
+
557
+ # Bulk operations
558
+ st.markdown("### πŸš€ Bulk Operations")
559
+
560
+ if st.button("🌍 Fetch All APIs", use_container_width=True, type="primary"):
561
+ progress_bar = st.progress(0)
562
+ status_text = st.empty()
563
+
564
+ total_operations = sum(len(config["endpoints"]) for config in API_CONFIG.values())
565
+ completed = 0
566
+
567
+ for api_key, api_config in API_CONFIG.items():
568
+ status_text.text(f"Processing {api_key}...")
569
+ results = {}
570
+
571
+ for endpoint_name in api_config["endpoints"].keys():
572
+ result = st.session_state.fetcher.fetch_api_data(api_key, endpoint_name)
573
+ results[endpoint_name] = result
574
+ completed += 1
575
+ progress_bar.progress(completed / total_operations)
576
+
577
+ st.session_state.fetch_results[api_key] = results
578
+ st.session_state.active_apis.add(api_key)
579
+
580
+ status_text.text("βœ… All APIs processed!")
581
+ time.sleep(1)
582
+ st.rerun()
583
+
584
+ # Results section
585
+ if st.session_state.fetch_results:
586
+ st.markdown("### πŸ“ˆ Latest Results")
587
+
588
+ # Create tabs for different views
589
+ tab1, tab2, tab3 = st.tabs(["πŸ“Š Data Preview", "πŸ“‹ Status Overview", "πŸ’Ύ Export"])
590
+
591
+ with tab1:
592
+ selected_api = st.selectbox("Select API for preview", list(st.session_state.fetch_results.keys()))
593
+
594
+ if selected_api:
595
+ api_results = st.session_state.fetch_results[selected_api]
596
+
597
+ for endpoint_name, result in api_results.items():
598
+ if result.get("status") == "success":
599
+ st.markdown(f"**{selected_api} - {endpoint_name}**")
600
+
601
+ data = result.get("data", [])
602
+ if isinstance(data, list) and len(data) > 0:
603
+ # Create DataFrame and show preview
604
+ df = pd.json_normalize(data[:10]) # First 10 records
605
+ st.dataframe(df, use_container_width=True)
606
+ st.caption(f"Showing 10 of {len(data)} records")
607
+ else:
608
+ st.json(data)
609
+
610
+ with tab2:
611
+ # Status overview with charts
612
+ status_data = []
613
+ for api_name, api_results in st.session_state.fetch_results.items():
614
+ for endpoint_name, result in api_results.items():
615
+ status_data.append({
616
+ "API": api_name,
617
+ "Endpoint": endpoint_name,
618
+ "Status": result.get("status", "unknown"),
619
+ "Records": result.get("record_count", 0),
620
+ "Timestamp": result.get("timestamp", "")
621
+ })
622
+
623
+ if status_data:
624
+ df_status = pd.DataFrame(status_data)
625
+
626
+ # Success rate chart
627
+ fig = px.pie(
628
+ df_status,
629
+ names="Status",
630
+ title="Fetch Success Rate",
631
+ color_discrete_map={"success": "#4ade80", "error": "#ef4444"}
632
+ )
633
+ fig.update_layout(
634
+ paper_bgcolor="rgba(0,0,0,0)",
635
+ plot_bgcolor="rgba(0,0,0,0)",
636
+ font_color="white"
637
+ )
638
+ st.plotly_chart(fig, use_container_width=True)
639
+
640
+ # Records by API
641
+ records_by_api = df_status[df_status["Status"] == "success"].groupby("API")["Records"].sum().reset_index()
642
+ if not records_by_api.empty:
643
+ fig2 = px.bar(
644
+ records_by_api,
645
+ x="API",
646
+ y="Records",
647
+ title="Records Fetched by API"
648
+ )
649
+ fig2.update_layout(
650
+ paper_bgcolor="rgba(0,0,0,0)",
651
+ plot_bgcolor="rgba(0,0,0,0)",
652
+ font_color="white"
653
+ )
654
+ st.plotly_chart(fig2, use_container_width=True)
655
+
656
+ with tab3:
657
+ st.markdown("**Export Options**")
658
+
659
+ # Database statistics
660
+ conn = sqlite3.connect(DB_PATH)
661
+ total_records_db = pd.read_sql_query("SELECT COUNT(*) as count FROM api_data", conn).iloc[0]["count"]
662
+ conn.close()
663
+
664
+ st.metric("Database Records", total_records_db)
665
+
666
+ if st.button("πŸ“„ Export All Data (JSON)", use_container_width=True):
667
+ export_data = {
668
+ "export_timestamp": datetime.now().isoformat(),
669
+ "session_results": st.session_state.fetch_results,
670
+ "summary": {
671
+ "total_apis": len(st.session_state.fetch_results),
672
+ "total_records": total_records,
673
+ "active_apis": list(st.session_state.active_apis)
674
+ }
675
+ }
676
+
677
+ st.download_button(
678
+ "πŸ’Ύ Download Export",
679
+ data=json.dumps(export_data, indent=2, default=str),
680
+ file_name=f"global_data_export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
681
+ mime="application/json",
682
+ use_container_width=True
683
+ )
684
+
685
+ # Footer
686
+ st.markdown("---")
687
+ st.markdown("""
688
+ <div style="text-align: center; padding: 1rem; opacity: 0.8;">
689
+ <p><strong>🌍 Global Data Harvester</strong> - Modern data collection from international sources</p>
690
+ <p style="font-size: 0.9rem;">
691
+ πŸ‡ΈπŸ‡ͺ Swedish: Skolverket, SCB, Kolada, Riksbanken, Swecris, CSN β€’
692
+ 🌍 Global: Eurostat, WHO, OECD, World Bank
693
+ </p>
694
+ </div>
695
+ """, unsafe_allow_html=True)