isakskogstad commited on
Commit
18b60c9
Β·
verified Β·
1 Parent(s): f818254

Upload app_simplified.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app_simplified.py +668 -0
app_simplified.py ADDED
@@ -0,0 +1,668 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ import pandas as pd
4
+ import plotly.express as px
5
+ import json
6
+ import time
7
+ from datetime import datetime
8
+ import sqlite3
9
+ import hashlib
10
+ import gzip
11
+ from typing import Dict, Any, List
12
+ import warnings
13
+ warnings.filterwarnings('ignore')
14
+
15
+ # Global ML availability flag
16
+ ML_AVAILABLE = False
17
+
18
+ # AI/ML Imports for enhanced functionality
19
+ try:
20
+ from transformers import pipeline
21
+ from sentence_transformers import SentenceTransformer
22
+ ML_AVAILABLE = True
23
+ except ImportError:
24
+ ML_AVAILABLE = False
25
+
26
+ # Enhanced Page Configuration
27
+ st.set_page_config(
28
+ page_title="Simplified Data Harvester",
29
+ page_icon="πŸš€",
30
+ layout="wide",
31
+ initial_sidebar_state="collapsed"
32
+ )
33
+
34
+ # Enhanced CSS with modern, professional styling
35
+ st.markdown("""
36
+ <style>
37
+ .main > div {
38
+ padding-top: 1rem;
39
+ }
40
+ .stApp {
41
+ background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
42
+ color: #2c3e50;
43
+ }
44
+ .title-container {
45
+ text-align: center;
46
+ padding: 2rem 0;
47
+ background: rgba(255, 255, 255, 0.9);
48
+ border-radius: 15px;
49
+ margin-bottom: 2rem;
50
+ border: 1px solid rgba(52, 73, 94, 0.1);
51
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.05);
52
+ }
53
+ .api-grid {
54
+ display: grid;
55
+ grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
56
+ gap: 1rem;
57
+ margin: 1rem 0;
58
+ }
59
+ .api-card {
60
+ background: rgba(255, 255, 255, 0.9);
61
+ border-radius: 10px;
62
+ padding: 1rem;
63
+ border: 1px solid rgba(52, 73, 94, 0.15);
64
+ text-align: center;
65
+ }
66
+ .main-button {
67
+ background: linear-gradient(135deg, #3498db, #2980b9);
68
+ color: white;
69
+ border: none;
70
+ border-radius: 12px;
71
+ padding: 1rem 2rem;
72
+ font-size: 1.2rem;
73
+ font-weight: bold;
74
+ cursor: pointer;
75
+ width: 100%;
76
+ margin: 1rem 0;
77
+ }
78
+ .main-button:hover {
79
+ background: linear-gradient(135deg, #2980b9, #1f4e79);
80
+ transform: translateY(-2px);
81
+ box-shadow: 0 8px 25px rgba(52, 152, 219, 0.3);
82
+ }
83
+ </style>
84
+ """, unsafe_allow_html=True)
85
+
86
+ # Database Configuration
87
+ DB_PATH = "simplified_harvester.db"
88
+
89
+ # Simplified API Configuration - Real working endpoints
90
+ SIMPLIFIED_API_CONFIG = {
91
+ "Skolverket": {
92
+ "name": "πŸ‡ΈπŸ‡ͺ Skolverket",
93
+ "description": "Swedish National Agency for Education",
94
+ "endpoints": [
95
+ {
96
+ "url": "https://api.skolverket.se/planned-educations/v3",
97
+ "headers": {"Accept": "application/vnd.skolverket.plannededucations.api.v3.hal+json"},
98
+ "method": "GET"
99
+ }
100
+ ]
101
+ },
102
+ "SCB": {
103
+ "name": "πŸ‡ΈπŸ‡ͺ Statistics Sweden",
104
+ "description": "Swedish National Statistics Office",
105
+ "endpoints": [
106
+ {
107
+ "url": "https://api.scb.se/OV0104/v1/doris/sv/ssd/START/BE/BE0101/BE0101A/BefolkningNy",
108
+ "headers": {"Content-Type": "application/json"},
109
+ "method": "POST",
110
+ "data": {
111
+ "query": [
112
+ {"code": "Region", "selection": {"filter": "item", "values": ["00"]}},
113
+ {"code": "Civilstand", "selection": {"filter": "item", "values": ["TOT"]}},
114
+ {"code": "Alder", "selection": {"filter": "item", "values": ["tot"]}},
115
+ {"code": "Kon", "selection": {"filter": "item", "values": ["1", "2"]}},
116
+ {"code": "ContentsCode", "selection": {"filter": "item", "values": ["BE0101N1"]}},
117
+ {"code": "Tid", "selection": {"filter": "item", "values": ["2023"]}}
118
+ ],
119
+ "response": {"format": "json"}
120
+ }
121
+ }
122
+ ]
123
+ },
124
+ "Kolada": {
125
+ "name": "πŸ‡ΈπŸ‡ͺ Kolada",
126
+ "description": "Municipal Key Performance Indicators",
127
+ "endpoints": [
128
+ {
129
+ "url": "https://api.kolada.se/v2/municipality",
130
+ "headers": {"Accept": "application/json"},
131
+ "method": "GET"
132
+ }
133
+ ]
134
+ },
135
+ "Eurostat": {
136
+ "name": "πŸ‡ͺπŸ‡Ί Eurostat",
137
+ "description": "European Union Statistics",
138
+ "endpoints": [
139
+ {
140
+ "url": "https://ec.europa.eu/eurostat/api/dissemination/statistics/1.0/data/demo_pjan?format=JSON&lang=en&geo=EU27_2020&age=TOTAL&sex=T&time=2023",
141
+ "headers": {"Accept": "application/json"},
142
+ "method": "GET"
143
+ }
144
+ ]
145
+ },
146
+ "WHO": {
147
+ "name": "🌍 WHO",
148
+ "description": "World Health Organization",
149
+ "endpoints": [
150
+ {
151
+ "url": "https://ghoapi.azureedge.net/api/WHOSIS_000001",
152
+ "headers": {"Accept": "application/json"},
153
+ "method": "GET"
154
+ }
155
+ ]
156
+ },
157
+ "OECD": {
158
+ "name": "🌍 OECD",
159
+ "description": "Organisation for Economic Co-operation and Development",
160
+ "endpoints": [
161
+ {
162
+ "url": "https://sdmx.oecd.org/public/rest/data/OECD.SDD.NAD,DSD_NAMAIN1@DF_QNA,1.0/AUS.B1GQ.C.Q?format=jsondata",
163
+ "headers": {"Accept": "application/vnd.sdmx.data+json;version=1.0.0"},
164
+ "method": "GET"
165
+ }
166
+ ]
167
+ },
168
+ "WorldBank": {
169
+ "name": "🌍 World Bank",
170
+ "description": "International Financial Institution",
171
+ "endpoints": [
172
+ {
173
+ "url": "https://api.worldbank.org/v2/country?format=json&per_page=50",
174
+ "headers": {"Accept": "application/json"},
175
+ "method": "GET"
176
+ }
177
+ ]
178
+ },
179
+ "Riksbanken": {
180
+ "name": "πŸ‡ΈπŸ‡ͺ Riksbanken",
181
+ "description": "Swedish Central Bank",
182
+ "endpoints": [
183
+ {
184
+ "url": "https://api.riksbank.se/swea/v1/Observations/SEKEURPMI/2023-01-01/2023-12-31",
185
+ "headers": {"Accept": "application/json"},
186
+ "method": "GET"
187
+ }
188
+ ]
189
+ },
190
+ "Swecris": {
191
+ "name": "πŸ‡ΈπŸ‡ͺ Swecris",
192
+ "description": "Swedish Research Council Database",
193
+ "endpoints": [
194
+ {
195
+ "url": "https://swecris-api.vr.se/v1/projects?size=50",
196
+ "headers": {
197
+ "Accept": "application/json",
198
+ "Authorization": "Bearer VRSwecrisAPI2025-1"
199
+ },
200
+ "method": "GET"
201
+ }
202
+ ]
203
+ },
204
+ "CSN": {
205
+ "name": "πŸ‡ΈπŸ‡ͺ CSN",
206
+ "description": "Swedish Board of Student Finance",
207
+ "endpoints": [
208
+ {
209
+ "url": "https://statistik.csn.se/PXWeb/api/v1/sv/CSNstat/StudiebidragGymnasieskola/SS0101B1.px",
210
+ "headers": {"Content-Type": "application/json"},
211
+ "method": "POST",
212
+ "data": {
213
+ "query": [
214
+ {"code": "Region", "selection": {"filter": "item", "values": ["00"]}},
215
+ {"code": "ContentsCode", "selection": {"filter": "item", "values": ["SS0101B1"]}},
216
+ {"code": "Tid", "selection": {"filter": "item", "values": ["2023"]}}
217
+ ],
218
+ "response": {"format": "json"}
219
+ }
220
+ }
221
+ ]
222
+ }
223
+ }
224
+
225
+ def init_database():
226
+ """Initialize SQLite database"""
227
+ conn = sqlite3.connect(DB_PATH)
228
+ cursor = conn.cursor()
229
+
230
+ cursor.execute('''
231
+ CREATE TABLE IF NOT EXISTS harvested_data (
232
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
233
+ api_name TEXT NOT NULL,
234
+ endpoint_url TEXT NOT NULL,
235
+ data_hash TEXT UNIQUE,
236
+ raw_data TEXT,
237
+ compressed_data BLOB,
238
+ record_count INTEGER,
239
+ data_size_bytes INTEGER,
240
+ fetch_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
241
+ fetch_duration_ms INTEGER,
242
+ status TEXT DEFAULT 'success',
243
+ session_id TEXT
244
+ )
245
+ ''')
246
+
247
+ cursor.execute('CREATE INDEX IF NOT EXISTS idx_api_name ON harvested_data(api_name)')
248
+ cursor.execute('CREATE INDEX IF NOT EXISTS idx_timestamp ON harvested_data(fetch_timestamp)')
249
+ cursor.execute('CREATE INDEX IF NOT EXISTS idx_status ON harvested_data(status)')
250
+
251
+ conn.commit()
252
+ conn.close()
253
+
254
+ class SimplifiedDataHarvester:
255
+ """Simplified data harvester - one function to fetch from all APIs"""
256
+
257
+ def __init__(self):
258
+ self.session = requests.Session()
259
+ self.session.headers.update({
260
+ 'User-Agent': 'Simplified-Data-Harvester/1.0 (Research & Analysis)'
261
+ })
262
+ self.results = {}
263
+ self.errors = {}
264
+
265
+ def fetch_all_apis(self, progress_callback=None) -> Dict:
266
+ """One function to fetch data from all APIs automatically"""
267
+ session_id = f"simplified_{int(time.time())}"
268
+ total_apis = len(SIMPLIFIED_API_CONFIG)
269
+ completed = 0
270
+
271
+ if progress_callback:
272
+ progress_callback(f"πŸš€ Starting data collection from {total_apis} APIs...")
273
+
274
+ for api_name, config in SIMPLIFIED_API_CONFIG.items():
275
+ if progress_callback:
276
+ progress_callback(f"πŸ”„ Fetching from {config['name']}...")
277
+
278
+ try:
279
+ api_results = self._fetch_api_data(api_name, config, session_id)
280
+ self.results[api_name] = api_results
281
+ completed += 1
282
+
283
+ if progress_callback:
284
+ progress = (completed / total_apis) * 100
285
+ progress_callback(f"βœ… {config['name']} completed ({progress:.1f}%)")
286
+
287
+ time.sleep(0.5) # Respectful delay
288
+
289
+ except Exception as e:
290
+ self.errors[api_name] = str(e)
291
+ if progress_callback:
292
+ progress_callback(f"❌ {config['name']} failed: {str(e)[:50]}...")
293
+ completed += 1
294
+
295
+ if progress_callback:
296
+ successful = len(self.results)
297
+ failed = len(self.errors)
298
+ progress_callback(f"πŸŽ‰ Collection complete! βœ… {successful} successful, ❌ {failed} failed")
299
+
300
+ return {
301
+ "results": self.results,
302
+ "errors": self.errors,
303
+ "session_id": session_id,
304
+ "summary": {
305
+ "total_apis": total_apis,
306
+ "successful": len(self.results),
307
+ "failed": len(self.errors),
308
+ "success_rate": (len(self.results) / total_apis) * 100
309
+ }
310
+ }
311
+
312
+ def _fetch_api_data(self, api_name: str, config: Dict, session_id: str) -> Dict:
313
+ """Fetch data from all endpoints for a specific API"""
314
+ api_results = {
315
+ "api_name": api_name,
316
+ "endpoints": [],
317
+ "total_records": 0,
318
+ "total_size": 0
319
+ }
320
+
321
+ for i, endpoint in enumerate(config['endpoints']):
322
+ try:
323
+ start_time = time.time()
324
+
325
+ # Make request
326
+ if endpoint.get('method', 'GET').upper() == 'POST':
327
+ response = self.session.post(
328
+ endpoint['url'],
329
+ headers=endpoint.get('headers', {}),
330
+ json=endpoint.get('data', {}),
331
+ timeout=30
332
+ )
333
+ else:
334
+ response = self.session.get(
335
+ endpoint['url'],
336
+ headers=endpoint.get('headers', {}),
337
+ timeout=30
338
+ )
339
+
340
+ response.raise_for_status()
341
+
342
+ # Process response
343
+ data = self._process_response(response)
344
+ fetch_duration = int((time.time() - start_time) * 1000)
345
+
346
+ # Extract meaningful data
347
+ processed_data = self._extract_api_data(data, api_name)
348
+ record_count = self._count_records(processed_data)
349
+ data_size = len(response.content)
350
+
351
+ # Save to database
352
+ self._save_data_to_db(
353
+ api_name, endpoint['url'], processed_data, session_id,
354
+ fetch_duration, record_count, data_size, "success"
355
+ )
356
+
357
+ endpoint_result = {
358
+ "endpoint_url": endpoint['url'],
359
+ "status": "success",
360
+ "records": record_count,
361
+ "size_bytes": data_size,
362
+ "duration_ms": fetch_duration,
363
+ "data_preview": str(processed_data)[:200] + "..." if len(str(processed_data)) > 200 else str(processed_data)
364
+ }
365
+
366
+ api_results["endpoints"].append(endpoint_result)
367
+ api_results["total_records"] += record_count
368
+ api_results["total_size"] += data_size
369
+
370
+ except Exception as e:
371
+ endpoint_result = {
372
+ "endpoint_url": endpoint['url'],
373
+ "status": "error",
374
+ "error": str(e),
375
+ "records": 0,
376
+ "size_bytes": 0,
377
+ "duration_ms": 0
378
+ }
379
+ api_results["endpoints"].append(endpoint_result)
380
+
381
+ return api_results
382
+
383
+ def _process_response(self, response):
384
+ """Process API response"""
385
+ content_type = response.headers.get('content-type', '').lower()
386
+
387
+ if 'json' in content_type:
388
+ return response.json()
389
+ else:
390
+ try:
391
+ return response.json() # Try JSON first
392
+ except:
393
+ return {"raw_content": response.text}
394
+
395
+ def _extract_api_data(self, data: Any, api_name: str) -> Any:
396
+ """Extract meaningful data from API response"""
397
+ if api_name == "SCB" and isinstance(data, dict):
398
+ return data.get("data", data)
399
+ elif api_name == "Kolada" and isinstance(data, dict):
400
+ return data.get("values", data)
401
+ elif api_name == "WorldBank" and isinstance(data, list) and len(data) > 1:
402
+ return data[1] if data[1] else data[0]
403
+ else:
404
+ return data
405
+
406
+ def _count_records(self, data: Any) -> int:
407
+ """Count records in the data"""
408
+ if isinstance(data, list):
409
+ return len(data)
410
+ elif isinstance(data, dict):
411
+ for key, value in data.items():
412
+ if isinstance(value, list) and len(value) > 0:
413
+ return len(value)
414
+ return 1
415
+ else:
416
+ return 1 if data else 0
417
+
418
+ def _save_data_to_db(self, api_name: str, endpoint_url: str, data: Any,
419
+ session_id: str, fetch_duration: int, record_count: int,
420
+ data_size: int, status: str):
421
+ """Save data to database"""
422
+ conn = sqlite3.connect(DB_PATH)
423
+ cursor = conn.cursor()
424
+
425
+ try:
426
+ # Create data hash for deduplication
427
+ data_str = json.dumps(data, sort_keys=True, default=str)
428
+ data_hash = hashlib.sha256(data_str.encode()).hexdigest()
429
+
430
+ # Check if data exists
431
+ cursor.execute('SELECT id FROM harvested_data WHERE data_hash = ?', (data_hash,))
432
+ if cursor.fetchone():
433
+ return # Skip duplicate
434
+
435
+ # Compress if large
436
+ raw_data = None
437
+ compressed_data = None
438
+
439
+ if data_size > 1024:
440
+ try:
441
+ compressed_data = gzip.compress(data_str.encode('utf-8'))
442
+ except:
443
+ raw_data = data_str
444
+ else:
445
+ raw_data = data_str
446
+
447
+ # Insert data
448
+ cursor.execute('''
449
+ INSERT INTO harvested_data
450
+ (api_name, endpoint_url, data_hash, raw_data, compressed_data,
451
+ record_count, data_size_bytes, fetch_duration_ms, status, session_id)
452
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
453
+ ''', (
454
+ api_name, endpoint_url, data_hash, raw_data, compressed_data,
455
+ record_count, data_size, fetch_duration, status, session_id
456
+ ))
457
+
458
+ conn.commit()
459
+
460
+ except Exception as e:
461
+ pass # Silent fail for database errors
462
+
463
+ finally:
464
+ conn.close()
465
+
466
+ def get_database_stats():
467
+ """Get database statistics"""
468
+ conn = sqlite3.connect(DB_PATH)
469
+ cursor = conn.cursor()
470
+
471
+ try:
472
+ cursor.execute('SELECT COUNT(*) FROM harvested_data')
473
+ total_records = cursor.fetchone()[0]
474
+
475
+ cursor.execute('SELECT COUNT(DISTINCT api_name) FROM harvested_data')
476
+ active_apis = cursor.fetchone()[0]
477
+
478
+ cursor.execute('SELECT SUM(record_count) FROM harvested_data')
479
+ total_data_records = cursor.fetchone()[0] or 0
480
+
481
+ return {
482
+ "total_records": total_records,
483
+ "active_apis": active_apis,
484
+ "total_data_records": total_data_records
485
+ }
486
+ finally:
487
+ conn.close()
488
+
489
+ # Initialize database
490
+ init_database()
491
+
492
+ # Initialize components
493
+ if 'harvester' not in st.session_state:
494
+ st.session_state.harvester = SimplifiedDataHarvester()
495
+
496
+ if 'last_results' not in st.session_state:
497
+ st.session_state.last_results = None
498
+
499
+ # Header
500
+ st.markdown("""
501
+ <div class="title-container">
502
+ <h1 style="font-size: 2.5rem; margin: 0; color: #2c3e50;">
503
+ πŸš€ Simplified Data Harvester
504
+ </h1>
505
+ <p style="font-size: 1.1rem; margin: 0.5rem 0 0 0; color: #34495e;">
506
+ One-Click Data Collection from All APIs
507
+ </p>
508
+ <p style="font-size: 0.95rem; margin: 0.3rem 0 0 0; color: #7f8c8d;">
509
+ Automatic data fetching from 10 international sources with smart database storage
510
+ </p>
511
+ </div>
512
+ """, unsafe_allow_html=True)
513
+
514
+ # Display ML status
515
+ if ML_AVAILABLE:
516
+ st.success("πŸ€– **AI Enhanced** - Quality assessment and analysis active")
517
+ else:
518
+ st.info("πŸ“Š **Standard Mode** - Core functionality available")
519
+
520
+ # Main Action Section
521
+ st.markdown("### πŸš€ Data Collection")
522
+
523
+ # Show API status
524
+ col1, col2 = st.columns([2, 1])
525
+
526
+ with col1:
527
+ st.markdown("**Available APIs:**")
528
+
529
+ # Display APIs in a compact format
530
+ for api_name, config in SIMPLIFIED_API_CONFIG.items():
531
+ st.markdown(f"βœ… **{config['name']}** - {config['description']}")
532
+
533
+ with col2:
534
+ # Database quick stats
535
+ try:
536
+ stats = get_database_stats()
537
+ st.metric("πŸ“Š Total Records", f"{stats.get('total_records', 0):,}")
538
+ st.metric("🌍 Active APIs", stats.get('active_apis', 0))
539
+ st.metric("πŸ“„ Data Records", f"{stats.get('total_data_records', 0):,}")
540
+ except:
541
+ st.metric("πŸ“Š Total Records", "0")
542
+ st.metric("🌍 Active APIs", "0")
543
+
544
+ st.markdown("---")
545
+
546
+ # Single button to fetch all data
547
+ col1, col2, col3 = st.columns([1, 2, 1])
548
+
549
+ with col2:
550
+ if st.button("πŸš€ FETCH ALL DATA FROM ALL APIS",
551
+ type="primary",
552
+ use_container_width=True,
553
+ help="Automatically collect data from all 10 APIs and save to database"):
554
+
555
+ # Progress tracking
556
+ status_container = st.empty()
557
+ progress_bar = st.progress(0)
558
+
559
+ def update_progress(message):
560
+ status_container.text(message)
561
+
562
+ # Execute the one-click data collection
563
+ with st.spinner("πŸ”„ Collecting data from all APIs..."):
564
+ results = st.session_state.harvester.fetch_all_apis(update_progress)
565
+ st.session_state.last_results = results
566
+
567
+ progress_bar.progress(1.0)
568
+ status_container.text("βœ… Collection completed!")
569
+
570
+ # Show results
571
+ summary = results['summary']
572
+
573
+ # Success metrics
574
+ col1, col2, col3, col4 = st.columns(4)
575
+
576
+ with col1:
577
+ st.metric("βœ… Successful APIs", summary['successful'])
578
+
579
+ with col2:
580
+ st.metric("❌ Failed APIs", summary['failed'])
581
+
582
+ with col3:
583
+ st.metric("πŸ“Š Success Rate", f"{summary['success_rate']:.1f}%")
584
+
585
+ with col4:
586
+ total_records = sum(api_data['total_records'] for api_data in results['results'].values())
587
+ st.metric("πŸ“„ Total Records", f"{total_records:,}")
588
+
589
+ # Detailed results
590
+ st.markdown("### πŸ“‹ Detailed Results")
591
+
592
+ for api_name, api_data in results['results'].items():
593
+ with st.expander(f"βœ… {SIMPLIFIED_API_CONFIG[api_name]['name']} - {api_data['total_records']} records"):
594
+ for endpoint in api_data['endpoints']:
595
+ st.write(f"**URL:** {endpoint['endpoint_url']}")
596
+ st.write(f"**Records:** {endpoint['records']} | **Duration:** {endpoint['duration_ms']}ms | **Size:** {endpoint['size_bytes']} bytes")
597
+
598
+ if endpoint.get('data_preview'):
599
+ with st.expander("πŸ‘οΈ Data Preview"):
600
+ st.text(endpoint['data_preview'])
601
+
602
+ # Error details
603
+ if results['errors']:
604
+ st.markdown("### ❌ Error Details")
605
+ for api_name, error in results['errors'].items():
606
+ st.error(f"**{SIMPLIFIED_API_CONFIG[api_name]['name']}:** {error}")
607
+
608
+ # Show last results if available
609
+ if st.session_state.last_results:
610
+ st.markdown("---")
611
+ st.markdown("### πŸ“Š Quick Analytics")
612
+
613
+ results = st.session_state.last_results
614
+
615
+ # Create simple visualizations
616
+ if results['results']:
617
+ # API success chart
618
+ api_names = []
619
+ record_counts = []
620
+
621
+ for api_name, api_data in results['results'].items():
622
+ api_names.append(SIMPLIFIED_API_CONFIG[api_name]['name'])
623
+ record_counts.append(api_data['total_records'])
624
+
625
+ if record_counts and any(count > 0 for count in record_counts):
626
+ fig = px.bar(
627
+ x=api_names,
628
+ y=record_counts,
629
+ title="πŸ“Š Records Collected by API",
630
+ labels={'x': 'API', 'y': 'Records'}
631
+ )
632
+ fig.update_layout(
633
+ paper_bgcolor="rgba(255,255,255,0.9)",
634
+ plot_bgcolor="rgba(255,255,255,0.9)",
635
+ font_color="#2c3e50"
636
+ )
637
+ st.plotly_chart(fig, use_container_width=True)
638
+
639
+ # Database viewer
640
+ with st.expander("πŸ—„οΈ Database Viewer"):
641
+ try:
642
+ conn = sqlite3.connect(DB_PATH)
643
+ df = pd.read_sql_query('''
644
+ SELECT api_name, endpoint_url, record_count, data_size_bytes,
645
+ fetch_timestamp, status
646
+ FROM harvested_data
647
+ ORDER BY fetch_timestamp DESC
648
+ LIMIT 100
649
+ ''', conn)
650
+ conn.close()
651
+
652
+ if not df.empty:
653
+ st.dataframe(df, use_container_width=True)
654
+ else:
655
+ st.info("No data in database yet. Run the data collection first!")
656
+ except Exception as e:
657
+ st.error(f"Database error: {e}")
658
+
659
+ # Footer
660
+ st.markdown("---")
661
+ st.markdown("""
662
+ <div style="text-align: center; padding: 1rem; background: rgba(255,255,255,0.8); border-radius: 10px; color: #2c3e50;">
663
+ <p><strong>πŸš€ Simplified Data Harvester</strong> - Easy one-click data collection</p>
664
+ <p style="font-size: 0.9rem; color: #7f8c8d;">
665
+ βœ… 10 APIs β€’ πŸ—„οΈ Smart storage β€’ πŸ“Š Auto-analytics β€’ πŸ” Real-time preview
666
+ </p>
667
+ </div>
668
+ """, unsafe_allow_html=True)