isakskogstad commited on
Commit
284efb4
Β·
verified Β·
1 Parent(s): 89f1bd7

Upload app_ultimate.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app_ultimate.py +153 -105
app_ultimate.py CHANGED
@@ -26,6 +26,9 @@ from sklearn.metrics.pairwise import cosine_similarity
26
  import warnings
27
  warnings.filterwarnings('ignore')
28
 
 
 
 
29
  # AI/ML Imports for enhanced functionality
30
  try:
31
  from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
@@ -33,7 +36,6 @@ try:
33
  ML_AVAILABLE = True
34
  except ImportError:
35
  ML_AVAILABLE = False
36
- st.warning("⚠️ ML libraries not available. Some AI features will be disabled.")
37
 
38
  # Enhanced Page Configuration
39
  st.set_page_config(
@@ -43,99 +45,134 @@ st.set_page_config(
43
  initial_sidebar_state="collapsed"
44
  )
45
 
46
- # Enhanced CSS with modern animations
47
  st.markdown("""
48
  <style>
49
  .main > div {
50
  padding-top: 1rem;
51
  }
52
  .stApp {
53
- background: linear-gradient(135deg, #667eea 0%, #764ba2 50%, #f093fb 100%);
54
- color: white;
55
  }
56
  .metric-card {
57
- background: rgba(255, 255, 255, 0.15);
58
- backdrop-filter: blur(15px);
59
- border-radius: 20px;
60
  padding: 1.5rem;
61
  margin: 0.5rem 0;
62
- border: 1px solid rgba(255, 255, 255, 0.3);
63
- box-shadow: 0 8px 32px 0 rgba(31, 38, 135, 0.37);
64
  transition: all 0.3s ease;
65
  }
66
  .metric-card:hover {
67
- transform: translateY(-3px);
68
- box-shadow: 0 12px 40px 0 rgba(31, 38, 135, 0.6);
69
  }
70
  .api-card {
71
- background: rgba(255, 255, 255, 0.12);
72
- backdrop-filter: blur(10px);
73
- border-radius: 15px;
74
  padding: 1.2rem;
75
  margin: 0.5rem;
76
- border: 1px solid rgba(255, 255, 255, 0.25);
77
- transition: all 0.4s cubic-bezier(0.4, 0, 0.2, 1);
78
  position: relative;
79
  overflow: hidden;
80
  }
81
- .api-card::before {
82
- content: '';
83
- position: absolute;
84
- top: 0;
85
- left: -100%;
86
- width: 100%;
87
- height: 100%;
88
- background: linear-gradient(90deg, transparent, rgba(255,255,255,0.1), transparent);
89
- transition: left 0.5s;
90
- }
91
- .api-card:hover::before {
92
- left: 100%;
93
- }
94
  .api-card:hover {
95
- transform: translateY(-5px) scale(1.02);
96
- box-shadow: 0 15px 45px 0 rgba(31, 38, 135, 0.6);
97
- border-color: rgba(255, 255, 255, 0.4);
98
  }
99
  .title-container {
100
  text-align: center;
101
  padding: 2rem 0;
102
- background: rgba(255, 255, 255, 0.08);
103
- border-radius: 25px;
104
  margin-bottom: 2rem;
105
- backdrop-filter: blur(20px);
106
- border: 1px solid rgba(255, 255, 255, 0.2);
107
  }
108
  .status-indicator {
109
- width: 12px;
110
- height: 12px;
111
  border-radius: 50%;
112
  display: inline-block;
113
  margin-right: 8px;
114
- animation: pulse 2s infinite;
115
  }
116
- @keyframes pulse {
117
- 0% { opacity: 1; }
118
- 50% { opacity: 0.5; }
119
- 100% { opacity: 1; }
 
 
 
 
 
 
 
 
120
  }
121
- .status-active { background-color: #4ade80; }
122
- .status-discovering { background-color: #fbbf24; }
123
- .status-error { background-color: #ef4444; }
124
- .status-paused { background-color: #6b7280; }
125
  .discovery-progress {
126
- background: rgba(255, 255, 255, 0.1);
127
- border-radius: 10px;
128
  padding: 1rem;
129
  margin: 1rem 0;
130
- border: 1px solid rgba(255, 255, 255, 0.2);
 
131
  }
132
  .endpoint-item {
133
- background: rgba(255, 255, 255, 0.05);
134
- border-radius: 8px;
135
  padding: 0.5rem;
136
  margin: 0.3rem 0;
137
- border-left: 3px solid #4ade80;
138
  font-size: 0.9rem;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  }
140
  </style>
141
  """, unsafe_allow_html=True)
@@ -156,6 +193,7 @@ class AIDataQualityAssessor:
156
 
157
  def _initialize_models(self):
158
  """Initialize AI models for quality assessment"""
 
159
  if ML_AVAILABLE:
160
  try:
161
  # Initialize quality classifier
@@ -168,10 +206,10 @@ class AIDataQualityAssessor:
168
  # Initialize embeddings model for similarity
169
  self.embeddings_model = SentenceTransformer('all-MiniLM-L6-v2')
170
 
171
- st.success("βœ… AI models loaded successfully!")
172
  except Exception as e:
173
- st.warning(f"⚠️ Failed to load AI models: {e}")
174
  ML_AVAILABLE = False
 
 
175
 
176
  def assess_data_quality(self, data: Any, api_name: str) -> Dict:
177
  """Comprehensive AI-powered data quality assessment"""
@@ -210,7 +248,6 @@ class AIDataQualityAssessor:
210
  }
211
 
212
  except Exception as e:
213
- st.warning(f"AI quality assessment failed: {e}")
214
  return self._basic_quality_assessment(data, api_name)
215
 
216
  def _data_to_text(self, data: Any) -> str:
@@ -362,11 +399,13 @@ class SemanticDataAnalyzer:
362
 
363
  def _initialize_model(self):
364
  """Initialize sentence transformer model"""
 
365
  if ML_AVAILABLE:
366
  try:
367
  self.embeddings_model = SentenceTransformer('all-MiniLM-L6-v2')
368
  except Exception as e:
369
- st.warning(f"Failed to load embeddings model: {e}")
 
370
 
371
  def find_similar_datasets(self, new_data: Any, api_name: str, threshold: float = 0.85) -> List[Dict]:
372
  """Find semantically similar datasets"""
@@ -396,7 +435,6 @@ class SemanticDataAnalyzer:
396
  return sorted(similar_datasets, key=lambda x: x['similarity'], reverse=True)
397
 
398
  except Exception as e:
399
- st.warning(f"Semantic analysis failed: {e}")
400
  return []
401
 
402
  def _data_to_text(self, data: Any) -> str:
@@ -509,7 +547,7 @@ class APIHealthMonitor:
509
  return float(anomaly_score)
510
 
511
  except Exception as e:
512
- st.warning(f"Anomaly detection failed: {e}")
513
 
514
  return 0.0
515
 
@@ -1702,20 +1740,29 @@ if 'last_session_info' not in st.session_state:
1702
  # Enhanced Header
1703
  st.markdown("""
1704
  <div class="title-container">
1705
- <h1 style="font-size: 3.5rem; margin: 0; background: linear-gradient(45deg, #fff, #f0f8ff, #e6f3ff); -webkit-background-clip: text; -webkit-text-fill-color: transparent;">
1706
  πŸš€ Ultimate Data Harvester
1707
  </h1>
1708
- <p style="font-size: 1.3rem; margin: 0.5rem 0 0 0; opacity: 0.9;">
1709
- Deep endpoint discovery β€’ Session resumption β€’ Intelligent storage
 
 
 
1710
  </p>
1711
  <div style="margin-top: 1rem;">
1712
- <span style="background: rgba(255,255,255,0.25); padding: 0.3rem 0.8rem; border-radius: 15px; margin: 0 0.3rem; font-size: 0.9rem;">Recursive Discovery</span>
1713
- <span style="background: rgba(255,255,255,0.25); padding: 0.3rem 0.8rem; border-radius: 15px; margin: 0 0.3rem; font-size: 0.9rem;">Auto-Resume</span>
1714
- <span style="background: rgba(255,255,255,0.25); padding: 0.3rem 0.8rem; border-radius: 15px; margin: 0 0.3rem; font-size: 0.9rem;">Smart Storage</span>
1715
  </div>
1716
  </div>
1717
  """, unsafe_allow_html=True)
1718
 
 
 
 
 
 
 
1719
  # Session Management Section
1720
  st.markdown("### 🎯 Session Management")
1721
 
@@ -1969,9 +2016,9 @@ with tab3:
1969
  color_continuous_scale='viridis'
1970
  )
1971
  fig_records.update_layout(
1972
- paper_bgcolor="rgba(0,0,0,0)",
1973
- plot_bgcolor="rgba(0,0,0,0)",
1974
- font_color="white"
1975
  )
1976
  st.plotly_chart(fig_records, use_container_width=True)
1977
 
@@ -1985,9 +2032,9 @@ with tab3:
1985
  title="πŸ’Ύ Data Size Distribution (MB)"
1986
  )
1987
  fig_size.update_layout(
1988
- paper_bgcolor="rgba(0,0,0,0)",
1989
- plot_bgcolor="rgba(0,0,0,0)",
1990
- font_color="white"
1991
  )
1992
  st.plotly_chart(fig_size, use_container_width=True)
1993
 
@@ -2003,9 +2050,9 @@ with tab3:
2003
  color_continuous_scale='plasma'
2004
  )
2005
  fig_perf.update_layout(
2006
- paper_bgcolor="rgba(0,0,0,0)",
2007
- plot_bgcolor="rgba(0,0,0,0)",
2008
- font_color="white"
2009
  )
2010
  st.plotly_chart(fig_perf, use_container_width=True)
2011
 
@@ -2016,41 +2063,42 @@ with tab3:
2016
  conn.close()
2017
 
2018
  # AI Enhancement Panel
2019
- if ML_AVAILABLE:
2020
- st.markdown("---")
2021
- with st.expander("πŸ€– AI Enhancement Status", expanded=False):
2022
- col1, col2, col3 = st.columns(3)
2023
-
2024
- with col1:
2025
- st.markdown("**🎯 Quality Assessment**")
2026
- if ai_quality_assessor and ai_quality_assessor.quality_model:
2027
- st.success("βœ… Active - DistilBERT")
2028
- else:
2029
- st.error("❌ Not Available")
2030
-
2031
- with col2:
2032
- st.markdown("**πŸ” Semantic Analysis**")
2033
- if semantic_analyzer and semantic_analyzer.embeddings_model:
2034
- st.success("βœ… Active - MiniLM-L6-v2")
2035
- else:
2036
- st.error("❌ Not Available")
2037
-
2038
- with col3:
2039
- st.markdown("**πŸ“Š Health Monitoring**")
2040
- if health_monitor:
2041
- st.success("βœ… Active - Isolation Forest")
2042
- else:
2043
- st.error("❌ Not Available")
2044
-
2045
- if ai_quality_assessor and hasattr(ai_quality_assessor, 'quality_model'):
2046
- st.info("πŸ’‘ AI models are loaded and ready for enhanced data analysis!")
 
2047
 
2048
  # Footer
2049
  st.markdown("---")
2050
  st.markdown("""
2051
- <div style="text-align: center; padding: 1rem; opacity: 0.9;">
2052
- <p><strong>πŸš€ Ultimate Data Harvester with AI</strong> - Deep discovery, session resumption, intelligent storage</p>
2053
- <p style="font-size: 0.9rem;">
2054
  πŸ” Recursive endpoint discovery β€’ πŸ€– AI quality assessment β€’ 🎯 Session management β€’ πŸ’Ύ Smart database storage β€’ πŸ“Š Real-time analytics
2055
  </p>
2056
  </div>
 
26
  import warnings
27
  warnings.filterwarnings('ignore')
28
 
29
+ # Global ML availability flag
30
+ ML_AVAILABLE = False
31
+
32
  # AI/ML Imports for enhanced functionality
33
  try:
34
  from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
 
36
  ML_AVAILABLE = True
37
  except ImportError:
38
  ML_AVAILABLE = False
 
39
 
40
  # Enhanced Page Configuration
41
  st.set_page_config(
 
45
  initial_sidebar_state="collapsed"
46
  )
47
 
48
+ # Enhanced CSS with modern, professional styling
49
  st.markdown("""
50
  <style>
51
  .main > div {
52
  padding-top: 1rem;
53
  }
54
  .stApp {
55
+ background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
56
+ color: #2c3e50;
57
  }
58
  .metric-card {
59
+ background: rgba(255, 255, 255, 0.95);
60
+ border-radius: 12px;
 
61
  padding: 1.5rem;
62
  margin: 0.5rem 0;
63
+ border: 1px solid rgba(52, 73, 94, 0.1);
64
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
65
  transition: all 0.3s ease;
66
  }
67
  .metric-card:hover {
68
+ transform: translateY(-2px);
69
+ box-shadow: 0 8px 25px rgba(0, 0, 0, 0.15);
70
  }
71
  .api-card {
72
+ background: rgba(255, 255, 255, 0.9);
73
+ border-radius: 10px;
 
74
  padding: 1.2rem;
75
  margin: 0.5rem;
76
+ border: 1px solid rgba(52, 73, 94, 0.15);
77
+ transition: all 0.3s ease;
78
  position: relative;
79
  overflow: hidden;
80
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  .api-card:hover {
82
+ transform: translateY(-3px);
83
+ box-shadow: 0 8px 20px rgba(0, 0, 0, 0.12);
84
+ border-color: #3498db;
85
  }
86
  .title-container {
87
  text-align: center;
88
  padding: 2rem 0;
89
+ background: rgba(255, 255, 255, 0.9);
90
+ border-radius: 15px;
91
  margin-bottom: 2rem;
92
+ border: 1px solid rgba(52, 73, 94, 0.1);
93
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.05);
94
  }
95
  .status-indicator {
96
+ width: 10px;
97
+ height: 10px;
98
  border-radius: 50%;
99
  display: inline-block;
100
  margin-right: 8px;
 
101
  }
102
+ .status-active { background-color: #27ae60; }
103
+ .status-discovering { background-color: #f39c12; }
104
+ .status-error { background-color: #e74c3c; }
105
+ .status-paused { background-color: #95a5a6; }
106
+
107
+ .ai-panel {
108
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
109
+ border-radius: 10px;
110
+ padding: 1rem;
111
+ margin: 1rem 0;
112
+ color: white;
113
+ border: none;
114
  }
 
 
 
 
115
  .discovery-progress {
116
+ background: rgba(255, 255, 255, 0.95);
117
+ border-radius: 8px;
118
  padding: 1rem;
119
  margin: 1rem 0;
120
+ border: 1px solid rgba(52, 73, 94, 0.1);
121
+ box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05);
122
  }
123
  .endpoint-item {
124
+ background: rgba(255, 255, 255, 0.8);
125
+ border-radius: 6px;
126
  padding: 0.5rem;
127
  margin: 0.3rem 0;
128
+ border-left: 3px solid #3498db;
129
  font-size: 0.9rem;
130
+ color: #34495e;
131
+ }
132
+
133
+ /* Custom button styling */
134
+ .stButton > button {
135
+ background: linear-gradient(135deg, #3498db, #2980b9);
136
+ color: white;
137
+ border: none;
138
+ border-radius: 8px;
139
+ padding: 0.5rem 1rem;
140
+ font-weight: 500;
141
+ transition: all 0.3s ease;
142
+ }
143
+ .stButton > button:hover {
144
+ background: linear-gradient(135deg, #2980b9, #1f4e79);
145
+ transform: translateY(-1px);
146
+ box-shadow: 0 4px 12px rgba(52, 152, 219, 0.3);
147
+ }
148
+
149
+ /* Tab styling */
150
+ .stTabs [data-baseweb="tab-list"] {
151
+ gap: 8px;
152
+ }
153
+ .stTabs [data-baseweb="tab"] {
154
+ background-color: rgba(255, 255, 255, 0.7);
155
+ border-radius: 8px;
156
+ color: #2c3e50;
157
+ font-weight: 500;
158
+ }
159
+ .stTabs [aria-selected="true"] {
160
+ background-color: #3498db;
161
+ color: white;
162
+ }
163
+
164
+ /* Metrics styling */
165
+ [data-testid="metric-container"] {
166
+ background: rgba(255, 255, 255, 0.9);
167
+ border: 1px solid rgba(52, 73, 94, 0.1);
168
+ padding: 1rem;
169
+ border-radius: 8px;
170
+ box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05);
171
+ }
172
+
173
+ /* Sidebar styling */
174
+ .css-1d391kg {
175
+ background: linear-gradient(135deg, #ecf0f1 0%, #bdc3c7 100%);
176
  }
177
  </style>
178
  """, unsafe_allow_html=True)
 
193
 
194
  def _initialize_models(self):
195
  """Initialize AI models for quality assessment"""
196
+ global ML_AVAILABLE
197
  if ML_AVAILABLE:
198
  try:
199
  # Initialize quality classifier
 
206
  # Initialize embeddings model for similarity
207
  self.embeddings_model = SentenceTransformer('all-MiniLM-L6-v2')
208
 
 
209
  except Exception as e:
 
210
  ML_AVAILABLE = False
211
+ self.quality_model = None
212
+ self.embeddings_model = None
213
 
214
  def assess_data_quality(self, data: Any, api_name: str) -> Dict:
215
  """Comprehensive AI-powered data quality assessment"""
 
248
  }
249
 
250
  except Exception as e:
 
251
  return self._basic_quality_assessment(data, api_name)
252
 
253
  def _data_to_text(self, data: Any) -> str:
 
399
 
400
  def _initialize_model(self):
401
  """Initialize sentence transformer model"""
402
+ global ML_AVAILABLE
403
  if ML_AVAILABLE:
404
  try:
405
  self.embeddings_model = SentenceTransformer('all-MiniLM-L6-v2')
406
  except Exception as e:
407
+ ML_AVAILABLE = False
408
+ self.embeddings_model = None
409
 
410
  def find_similar_datasets(self, new_data: Any, api_name: str, threshold: float = 0.85) -> List[Dict]:
411
  """Find semantically similar datasets"""
 
435
  return sorted(similar_datasets, key=lambda x: x['similarity'], reverse=True)
436
 
437
  except Exception as e:
 
438
  return []
439
 
440
  def _data_to_text(self, data: Any) -> str:
 
547
  return float(anomaly_score)
548
 
549
  except Exception as e:
550
+ pass # Silent fail for anomaly detection
551
 
552
  return 0.0
553
 
 
1740
  # Enhanced Header
1741
  st.markdown("""
1742
  <div class="title-container">
1743
+ <h1 style="font-size: 2.5rem; margin: 0; color: #2c3e50;">
1744
  πŸš€ Ultimate Data Harvester
1745
  </h1>
1746
+ <p style="font-size: 1.1rem; margin: 0.5rem 0 0 0; color: #34495e;">
1747
+ AI-Enhanced Deep Discovery β€’ Session Resumption β€’ Intelligent Storage
1748
+ </p>
1749
+ <p style="font-size: 0.95rem; margin: 0.3rem 0 0 0; color: #7f8c8d;">
1750
+ Comprehensive data collection from 10 international APIs with advanced analytics
1751
  </p>
1752
  <div style="margin-top: 1rem;">
1753
+ <span style="background: #ecf0f1; color: #2c3e50; padding: 0.3rem 0.8rem; border-radius: 15px; margin: 0 0.3rem; font-size: 0.9rem;">πŸ” Recursive Discovery</span>
1754
+ <span style="background: #ecf0f1; color: #2c3e50; padding: 0.3rem 0.8rem; border-radius: 15px; margin: 0 0.3rem; font-size: 0.9rem;">🎯 Auto-Resume</span>
1755
+ <span style="background: #ecf0f1; color: #2c3e50; padding: 0.3rem 0.8rem; border-radius: 15px; margin: 0 0.3rem; font-size: 0.9rem;">πŸ’Ύ Smart Storage</span>
1756
  </div>
1757
  </div>
1758
  """, unsafe_allow_html=True)
1759
 
1760
+ # Display ML status prominently
1761
+ if ML_AVAILABLE:
1762
+ st.success("πŸ€– **AI Enhanced Mode Active** - Advanced quality assessment and semantic analysis enabled")
1763
+ else:
1764
+ st.info("πŸ“Š **Standard Mode** - Basic functionality available. Install transformers and sentence-transformers for AI features.")
1765
+
1766
  # Session Management Section
1767
  st.markdown("### 🎯 Session Management")
1768
 
 
2016
  color_continuous_scale='viridis'
2017
  )
2018
  fig_records.update_layout(
2019
+ paper_bgcolor="rgba(255,255,255,0.9)",
2020
+ plot_bgcolor="rgba(255,255,255,0.9)",
2021
+ font_color="#2c3e50"
2022
  )
2023
  st.plotly_chart(fig_records, use_container_width=True)
2024
 
 
2032
  title="πŸ’Ύ Data Size Distribution (MB)"
2033
  )
2034
  fig_size.update_layout(
2035
+ paper_bgcolor="rgba(255,255,255,0.9)",
2036
+ plot_bgcolor="rgba(255,255,255,0.9)",
2037
+ font_color="#2c3e50"
2038
  )
2039
  st.plotly_chart(fig_size, use_container_width=True)
2040
 
 
2050
  color_continuous_scale='plasma'
2051
  )
2052
  fig_perf.update_layout(
2053
+ paper_bgcolor="rgba(255,255,255,0.9)",
2054
+ plot_bgcolor="rgba(255,255,255,0.9)",
2055
+ font_color="#2c3e50"
2056
  )
2057
  st.plotly_chart(fig_perf, use_container_width=True)
2058
 
 
2063
  conn.close()
2064
 
2065
  # AI Enhancement Panel
2066
+ st.markdown("---")
2067
+ with st.expander("πŸ€– AI Enhancement Status", expanded=False):
2068
+ col1, col2, col3 = st.columns(3)
2069
+
2070
+ with col1:
2071
+ st.markdown("**🎯 Quality Assessment**")
2072
+ if ML_AVAILABLE and ai_quality_assessor and ai_quality_assessor.quality_model:
2073
+ st.success("βœ… Active - DistilBERT")
2074
+ else:
2075
+ st.error("❌ Not Available")
2076
+
2077
+ with col2:
2078
+ st.markdown("**πŸ” Semantic Analysis**")
2079
+ if ML_AVAILABLE and semantic_analyzer and semantic_analyzer.embeddings_model:
2080
+ st.success("βœ… Active - MiniLM-L6-v2")
2081
+ else:
2082
+ st.error("❌ Not Available")
2083
+
2084
+ with col3:
2085
+ st.markdown("**πŸ“Š Health Monitoring**")
2086
+ if health_monitor:
2087
+ st.success("βœ… Active - Isolation Forest")
2088
+ else:
2089
+ st.error("❌ Not Available")
2090
+
2091
+ if ML_AVAILABLE:
2092
+ st.info("πŸ’‘ AI models are loaded and ready for enhanced data analysis!")
2093
+ else:
2094
+ st.warning("⚠️ Install ML libraries (transformers, sentence-transformers) for AI features")
2095
 
2096
  # Footer
2097
  st.markdown("---")
2098
  st.markdown("""
2099
+ <div style="text-align: center; padding: 1rem; background: rgba(255,255,255,0.8); border-radius: 10px; color: #2c3e50;">
2100
+ <p><strong>πŸš€ Ultimate Data Harvester with AI</strong> - Professional data collection platform</p>
2101
+ <p style="font-size: 0.9rem; color: #7f8c8d;">
2102
  πŸ” Recursive endpoint discovery β€’ πŸ€– AI quality assessment β€’ 🎯 Session management β€’ πŸ’Ύ Smart database storage β€’ πŸ“Š Real-time analytics
2103
  </p>
2104
  </div>