Spaces:
Sleeping
Sleeping
| """ | |
| Enhanced filtering and visualization example for UAP Data Analysis Tool | |
| Demonstrates the improved dynamic filtering and visualization pipeline | |
| """ | |
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| from data_processing import DataProcessor | |
| from visualization import UAP_Visualizer | |
| from session_manager import SessionStateManager | |
| import plotly.graph_objects as go | |
| def main(): | |
| """Main function demonstrating enhanced filtering and visualization""" | |
| st.title("π Enhanced UAP Data Analysis Pipeline") | |
| st.markdown("### Dynamic Filtering & Interactive Visualization System") | |
| # Initialize session state | |
| SessionStateManager.initialize() | |
| # Load data with caching | |
| def load_sample_data(): | |
| """Load sample data for demonstration""" | |
| try: | |
| # Try to load the actual UAP dataset | |
| df = DataProcessor.load_data('final_ufoseti_dataset.h5', key='df') | |
| st.success(f"β Loaded UAP dataset with {len(df):,} records") | |
| return df | |
| except Exception as e: | |
| st.warning(f"Could not load UAP dataset: {e}") | |
| # Create sample data if real data not available | |
| np.random.seed(42) | |
| n_samples = 10000 | |
| sample_data = { | |
| 'date': pd.date_range('2020-01-01', periods=n_samples, freq='H'), | |
| 'latitude': np.random.uniform(-90, 90, n_samples), | |
| 'longitude': np.random.uniform(-180, 180, n_samples), | |
| 'duration_minutes': np.random.exponential(10, n_samples), | |
| 'shape': np.random.choice(['circle', 'triangle', 'disk', 'light', 'other'], n_samples), | |
| 'color': np.random.choice(['white', 'red', 'orange', 'blue', 'green', 'unknown'], n_samples), | |
| 'altitude': np.random.uniform(100, 50000, n_samples), | |
| 'witnesses': np.random.poisson(2, n_samples) + 1, | |
| 'credibility_score': np.random.beta(2, 5, n_samples), | |
| 'description_length': np.random.lognormal(3, 1, n_samples).astype(int) | |
| } | |
| df = pd.DataFrame(sample_data) | |
| st.info(f"π Using sample dataset with {len(df):,} records for demonstration") | |
| return df | |
| # Load the data | |
| df = load_sample_data() | |
| # Sidebar for analysis options | |
| with st.sidebar: | |
| st.header("π§ Analysis Options") | |
| analysis_mode = st.radio( | |
| "Select Analysis Mode", | |
| ["Enhanced Filtering", "Interactive Visualizations", "Dashboard View", "Performance Demo"] | |
| ) | |
| enable_quick_filters = st.checkbox("Enable Quick Filters", value=False) | |
| enable_advanced_filters = st.checkbox("Enable Advanced Filters", value=True) | |
| max_viz_points = st.slider("Max Visualization Points", 1000, 50000, 10000, step=1000) | |
| # Main content based on selected mode | |
| if analysis_mode == "Enhanced Filtering": | |
| show_enhanced_filtering(df, enable_quick_filters, enable_advanced_filters) | |
| elif analysis_mode == "Interactive Visualizations": | |
| show_interactive_visualizations(df, max_viz_points) | |
| elif analysis_mode == "Dashboard View": | |
| show_dashboard_view(df, max_viz_points) | |
| elif analysis_mode == "Performance Demo": | |
| show_performance_demo(df) | |
| def show_enhanced_filtering(df: pd.DataFrame, enable_quick_filters: bool, enable_advanced_filters: bool): | |
| """Demonstrate enhanced filtering capabilities""" | |
| st.header("π Enhanced Dynamic Filtering") | |
| # Show data profile first | |
| with st.expander("π Data Profile Analysis", expanded=True): | |
| profile = DataProcessor.profile_data(df) | |
| col1, col2, col3, col4 = st.columns(4) | |
| with col1: | |
| st.metric("Categorical Columns", len(profile['categorical_columns'])) | |
| with col2: | |
| st.metric("Numeric Columns", len(profile['numeric_columns'])) | |
| with col3: | |
| st.metric("DateTime Columns", len(profile['datetime_columns'])) | |
| with col4: | |
| st.metric("Text Columns", len(profile['text_columns'])) | |
| # Show column details | |
| if st.checkbox("Show detailed column analysis"): | |
| st.json(profile) | |
| # Apply enhanced filtering | |
| st.subheader("Apply Filters") | |
| filtered_df = DataProcessor.filter_dataframe_enhanced( | |
| df, | |
| enable_quick_filters=enable_quick_filters, | |
| enable_advanced_filters=enable_advanced_filters | |
| ) | |
| # Show filtered results | |
| if len(filtered_df) > 0: | |
| st.subheader("π Filtered Data Preview") | |
| st.dataframe(filtered_df.head(100), use_container_width=True) | |
| # Export options | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| if st.button("πΎ Save Filtered Data"): | |
| # In a real app, you'd save to a file | |
| SessionStateManager.set('last_filtered_data', filtered_df) | |
| st.success("Filtered data saved to session!") | |
| with col2: | |
| csv = filtered_df.to_csv(index=False) | |
| st.download_button( | |
| label="π₯ Download CSV", | |
| data=csv, | |
| file_name="filtered_uap_data.csv", | |
| mime="text/csv" | |
| ) | |
| def show_interactive_visualizations(df: pd.DataFrame, max_points: int): | |
| """Demonstrate interactive visualization capabilities""" | |
| st.header("π Interactive Visualizations") | |
| # Get numeric and categorical columns | |
| numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist() | |
| categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist() | |
| datetime_cols = [] | |
| for col in df.columns: | |
| if pd.api.types.is_datetime64_any_dtype(df[col]): | |
| datetime_cols.append(col) | |
| # Visualization type selection | |
| viz_type = st.selectbox( | |
| "Select Visualization Type", | |
| ["Scatter Plot", "Histogram", "Treemap", "Correlation Matrix", "Time Series"] | |
| ) | |
| if viz_type == "Scatter Plot" and len(numeric_cols) >= 2: | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| x_col = st.selectbox("X-axis", numeric_cols, key="scatter_x") | |
| with col2: | |
| y_col = st.selectbox("Y-axis", [col for col in numeric_cols if col != x_col], key="scatter_y") | |
| with col3: | |
| color_col = st.selectbox("Color by", ["None"] + categorical_cols, key="scatter_color") | |
| color_col = None if color_col == "None" else color_col | |
| if st.button("Generate Scatter Plot"): | |
| fig = UAP_Visualizer.plot_interactive_scatter( | |
| df, x_col, y_col, color_col=color_col, max_points=max_points | |
| ) | |
| st.plotly_chart(fig, use_container_width=True) | |
| elif viz_type == "Histogram" and len(numeric_cols) > 0: | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| hist_col = st.selectbox("Column to analyze", numeric_cols + categorical_cols, key="hist_col") | |
| with col2: | |
| bins = st.slider("Number of bins", 10, 100, 50, key="hist_bins") | |
| if st.button("Generate Histogram"): | |
| if hist_col in numeric_cols: | |
| fig = UAP_Visualizer.plot_interactive_histogram(df, hist_col, bins=bins) | |
| st.plotly_chart(fig, use_container_width=True) | |
| else: | |
| # For categorical columns, use treemap instead | |
| fig = UAP_Visualizer.plot_interactive_treemap(df, hist_col, top_n=20) | |
| st.plotly_chart(fig, use_container_width=True) | |
| elif viz_type == "Treemap" and len(categorical_cols) > 0: | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| tree_col = st.selectbox("Categorical column", categorical_cols, key="tree_col") | |
| with col2: | |
| top_n = st.slider("Top N categories", 5, 50, 20, key="tree_n") | |
| if st.button("Generate Treemap"): | |
| fig = UAP_Visualizer.plot_interactive_treemap(df, tree_col, top_n=top_n) | |
| st.plotly_chart(fig, use_container_width=True) | |
| elif viz_type == "Correlation Matrix" and len(numeric_cols) >= 2: | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| corr_method = st.selectbox("Correlation method", ["pearson", "spearman", "kendall"], key="corr_method") | |
| with col2: | |
| selected_cols = st.multiselect("Select columns", numeric_cols, default=numeric_cols[:10], key="corr_cols") | |
| if selected_cols and st.button("Generate Correlation Matrix"): | |
| fig = UAP_Visualizer.plot_correlation_matrix(df[selected_cols], method=corr_method) | |
| st.plotly_chart(fig, use_container_width=True) | |
| elif viz_type == "Time Series" and len(datetime_cols) > 0 and len(numeric_cols) > 0: | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| date_col = st.selectbox("Date column", datetime_cols, key="ts_date") | |
| with col2: | |
| value_cols = st.multiselect("Value columns", numeric_cols, default=numeric_cols[:3], key="ts_values") | |
| with col3: | |
| resample_freq = st.selectbox("Resample frequency", ["None", "D", "W", "M"], key="ts_freq") | |
| resample_freq = None if resample_freq == "None" else resample_freq | |
| if value_cols and st.button("Generate Time Series"): | |
| fig = UAP_Visualizer.plot_time_series(df, date_col, value_cols, resample_freq=resample_freq) | |
| st.plotly_chart(fig, use_container_width=True) | |
| def show_dashboard_view(df: pd.DataFrame, max_points: int): | |
| """Demonstrate dashboard capabilities""" | |
| st.header("π Interactive Dashboard") | |
| # Create multiple charts for dashboard | |
| charts = [] | |
| numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist() | |
| categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist() | |
| if len(numeric_cols) >= 2 and len(categorical_cols) >= 1: | |
| with st.spinner("Generating dashboard charts..."): | |
| # Chart 1: Scatter plot | |
| if len(numeric_cols) >= 2: | |
| fig1 = UAP_Visualizer.plot_interactive_scatter( | |
| df, numeric_cols[0], numeric_cols[1], | |
| color_col=categorical_cols[0] if categorical_cols else None, | |
| max_points=max_points//4 | |
| ) | |
| charts.append(fig1) | |
| # Chart 2: Histogram | |
| if len(numeric_cols) >= 1: | |
| fig2 = UAP_Visualizer.plot_interactive_histogram(df, numeric_cols[0]) | |
| charts.append(fig2) | |
| # Chart 3: Treemap | |
| if len(categorical_cols) >= 1: | |
| fig3 = UAP_Visualizer.plot_interactive_treemap(df, categorical_cols[0], top_n=15) | |
| charts.append(fig3) | |
| # Chart 4: Correlation matrix (if enough numeric columns) | |
| if len(numeric_cols) >= 3: | |
| fig4 = UAP_Visualizer.plot_correlation_matrix(df[numeric_cols[:5]]) | |
| charts.append(fig4) | |
| # Display individual charts | |
| if len(charts) >= 2: | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.plotly_chart(charts[0], use_container_width=True) | |
| if len(charts) >= 3: | |
| st.plotly_chart(charts[2], use_container_width=True) | |
| with col2: | |
| st.plotly_chart(charts[1], use_container_width=True) | |
| if len(charts) >= 4: | |
| st.plotly_chart(charts[3], use_container_width=True) | |
| # Combined dashboard view | |
| if st.button("Generate Combined Dashboard"): | |
| dashboard_fig = UAP_Visualizer.create_dashboard_layout(charts[:4], layout="2x2") | |
| st.plotly_chart(dashboard_fig, use_container_width=True) | |
| else: | |
| st.warning("Not enough numeric or categorical columns for dashboard generation") | |
| def show_performance_demo(df: pd.DataFrame): | |
| """Demonstrate performance improvements""" | |
| st.header("β‘ Performance Demonstration") | |
| # Performance metrics | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| st.metric("Dataset Size", f"{len(df):,} rows") | |
| with col2: | |
| st.metric("Memory Usage", f"{df.memory_usage(deep=True).sum() / 1024**2:.1f} MB") | |
| with col3: | |
| cache_info = st.session_state.get('cached_visualizations', {}) | |
| st.metric("Cached Visualizations", len(cache_info)) | |
| # Performance comparison | |
| st.subheader("πββοΈ Speed Comparison") | |
| if st.button("Run Performance Test"): | |
| import time | |
| # Test data profiling speed | |
| start_time = time.time() | |
| profile = DataProcessor.profile_data(df) | |
| profile_time = time.time() - start_time | |
| # Test visualization generation | |
| numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist() | |
| if len(numeric_cols) >= 1: | |
| start_time = time.time() | |
| fig = UAP_Visualizer.plot_interactive_histogram(df, numeric_cols[0]) | |
| viz_time = time.time() - start_time | |
| else: | |
| viz_time = 0 | |
| # Display results | |
| perf_col1, perf_col2 = st.columns(2) | |
| with perf_col1: | |
| st.metric("Data Profiling", f"{profile_time:.3f} seconds") | |
| with perf_col2: | |
| st.metric("Visualization Generation", f"{viz_time:.3f} seconds") | |
| # Show caching benefits | |
| st.info("π Subsequent calls to the same functions will be much faster due to caching!") | |
| # Memory optimization demo | |
| if len(df) > 10000: | |
| st.subheader("π Smart Sampling Demo") | |
| sample_sizes = [1000, 5000, 10000, len(df)] | |
| sample_times = [] | |
| for size in sample_sizes: | |
| if size <= len(df): | |
| start_time = time.time() | |
| sampled_df = UAP_Visualizer._smart_sampling(df, max_points=size) | |
| sample_time = time.time() - start_time | |
| sample_times.append(sample_time) | |
| else: | |
| sample_times.append(None) | |
| # Create performance chart | |
| perf_data = { | |
| 'Sample Size': [f"{size:,}" for size in sample_sizes if sample_times[sample_sizes.index(size)] is not None], | |
| 'Processing Time': [t for t in sample_times if t is not None] | |
| } | |
| perf_df = pd.DataFrame(perf_data) | |
| st.line_chart(perf_df.set_index('Sample Size')) | |
| if __name__ == "__main__": | |
| main() | |