Spaces:

UFOSINT
/

UAP-Data-Analysis-Tool

Sleeping

App Files Files Community

UAP-Data-Analysis-Tool / utils /enhanced_example.py

Ashoka74

Deploy current work to HF Space (slim)

a1aef88 29 days ago

Raw

History Blame Contribute Delete

14.8 kB

	"""
	Enhanced filtering and visualization example for UAP Data Analysis Tool
	Demonstrates the improved dynamic filtering and visualization pipeline
	"""

	import streamlit as st
	import pandas as pd
	import numpy as np
	from data_processing import DataProcessor
	from visualization import UAP_Visualizer
	from session_manager import SessionStateManager
	import plotly.graph_objects as go

	def main():
	"""Main function demonstrating enhanced filtering and visualization"""

	st.title("🚀 Enhanced UAP Data Analysis Pipeline")
	st.markdown("### Dynamic Filtering & Interactive Visualization System")

	# Initialize session state
	SessionStateManager.initialize()

	# Load data with caching
	@st.cache_data
	def load_sample_data():
	"""Load sample data for demonstration"""
	try:
	# Try to load the actual UAP dataset
	df = DataProcessor.load_data('final_ufoseti_dataset.h5', key='df')
	st.success(f"✅ Loaded UAP dataset with {len(df):,} records")
	return df
	except Exception as e:
	st.warning(f"Could not load UAP dataset: {e}")
	# Create sample data if real data not available
	np.random.seed(42)
	n_samples = 10000

	sample_data = {
	'date': pd.date_range('2020-01-01', periods=n_samples, freq='H'),
	'latitude': np.random.uniform(-90, 90, n_samples),
	'longitude': np.random.uniform(-180, 180, n_samples),
	'duration_minutes': np.random.exponential(10, n_samples),
	'shape': np.random.choice(['circle', 'triangle', 'disk', 'light', 'other'], n_samples),
	'color': np.random.choice(['white', 'red', 'orange', 'blue', 'green', 'unknown'], n_samples),
	'altitude': np.random.uniform(100, 50000, n_samples),
	'witnesses': np.random.poisson(2, n_samples) + 1,
	'credibility_score': np.random.beta(2, 5, n_samples),
	'description_length': np.random.lognormal(3, 1, n_samples).astype(int)
	}

	df = pd.DataFrame(sample_data)
	st.info(f"📊 Using sample dataset with {len(df):,} records for demonstration")
	return df

	# Load the data
	df = load_sample_data()

	# Sidebar for analysis options
	with st.sidebar:
	st.header("🔧 Analysis Options")

	analysis_mode = st.radio(
	"Select Analysis Mode",
	["Enhanced Filtering", "Interactive Visualizations", "Dashboard View", "Performance Demo"]
	)

	enable_quick_filters = st.checkbox("Enable Quick Filters", value=False)
	enable_advanced_filters = st.checkbox("Enable Advanced Filters", value=True)
	max_viz_points = st.slider("Max Visualization Points", 1000, 50000, 10000, step=1000)

	# Main content based on selected mode
	if analysis_mode == "Enhanced Filtering":
	show_enhanced_filtering(df, enable_quick_filters, enable_advanced_filters)

	elif analysis_mode == "Interactive Visualizations":
	show_interactive_visualizations(df, max_viz_points)

	elif analysis_mode == "Dashboard View":
	show_dashboard_view(df, max_viz_points)

	elif analysis_mode == "Performance Demo":
	show_performance_demo(df)

	def show_enhanced_filtering(df: pd.DataFrame, enable_quick_filters: bool, enable_advanced_filters: bool):
	"""Demonstrate enhanced filtering capabilities"""

	st.header("🔍 Enhanced Dynamic Filtering")

	# Show data profile first
	with st.expander("📊 Data Profile Analysis", expanded=True):
	profile = DataProcessor.profile_data(df)

	col1, col2, col3, col4 = st.columns(4)
	with col1:
	st.metric("Categorical Columns", len(profile['categorical_columns']))
	with col2:
	st.metric("Numeric Columns", len(profile['numeric_columns']))
	with col3:
	st.metric("DateTime Columns", len(profile['datetime_columns']))
	with col4:
	st.metric("Text Columns", len(profile['text_columns']))

	# Show column details
	if st.checkbox("Show detailed column analysis"):
	st.json(profile)

	# Apply enhanced filtering
	st.subheader("Apply Filters")
	filtered_df = DataProcessor.filter_dataframe_enhanced(
	df,
	enable_quick_filters=enable_quick_filters,
	enable_advanced_filters=enable_advanced_filters
	)

	# Show filtered results
	if len(filtered_df) > 0:
	st.subheader("📋 Filtered Data Preview")
	st.dataframe(filtered_df.head(100), use_container_width=True)

	# Export options
	col1, col2 = st.columns(2)
	with col1:
	if st.button("💾 Save Filtered Data"):
	# In a real app, you'd save to a file
	SessionStateManager.set('last_filtered_data', filtered_df)
	st.success("Filtered data saved to session!")

	with col2:
	csv = filtered_df.to_csv(index=False)
	st.download_button(
	label="📥 Download CSV",
	data=csv,
	file_name="filtered_uap_data.csv",
	mime="text/csv"
	)

	def show_interactive_visualizations(df: pd.DataFrame, max_points: int):
	"""Demonstrate interactive visualization capabilities"""

	st.header("📊 Interactive Visualizations")

	# Get numeric and categorical columns
	numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
	categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()
	datetime_cols = []

	for col in df.columns:
	if pd.api.types.is_datetime64_any_dtype(df[col]):
	datetime_cols.append(col)

	# Visualization type selection
	viz_type = st.selectbox(
	"Select Visualization Type",
	["Scatter Plot", "Histogram", "Treemap", "Correlation Matrix", "Time Series"]
	)

	if viz_type == "Scatter Plot" and len(numeric_cols) >= 2:
	col1, col2, col3 = st.columns(3)

	with col1:
	x_col = st.selectbox("X-axis", numeric_cols, key="scatter_x")
	with col2:
	y_col = st.selectbox("Y-axis", [col for col in numeric_cols if col != x_col], key="scatter_y")
	with col3:
	color_col = st.selectbox("Color by", ["None"] + categorical_cols, key="scatter_color")
	color_col = None if color_col == "None" else color_col

	if st.button("Generate Scatter Plot"):
	fig = UAP_Visualizer.plot_interactive_scatter(
	df, x_col, y_col, color_col=color_col, max_points=max_points
	)
	st.plotly_chart(fig, use_container_width=True)

	elif viz_type == "Histogram" and len(numeric_cols) > 0:
	col1, col2 = st.columns(2)

	with col1:
	hist_col = st.selectbox("Column to analyze", numeric_cols + categorical_cols, key="hist_col")
	with col2:
	bins = st.slider("Number of bins", 10, 100, 50, key="hist_bins")

	if st.button("Generate Histogram"):
	if hist_col in numeric_cols:
	fig = UAP_Visualizer.plot_interactive_histogram(df, hist_col, bins=bins)
	st.plotly_chart(fig, use_container_width=True)
	else:
	# For categorical columns, use treemap instead
	fig = UAP_Visualizer.plot_interactive_treemap(df, hist_col, top_n=20)
	st.plotly_chart(fig, use_container_width=True)

	elif viz_type == "Treemap" and len(categorical_cols) > 0:
	col1, col2 = st.columns(2)

	with col1:
	tree_col = st.selectbox("Categorical column", categorical_cols, key="tree_col")
	with col2:
	top_n = st.slider("Top N categories", 5, 50, 20, key="tree_n")

	if st.button("Generate Treemap"):
	fig = UAP_Visualizer.plot_interactive_treemap(df, tree_col, top_n=top_n)
	st.plotly_chart(fig, use_container_width=True)

	elif viz_type == "Correlation Matrix" and len(numeric_cols) >= 2:
	col1, col2 = st.columns(2)

	with col1:
	corr_method = st.selectbox("Correlation method", ["pearson", "spearman", "kendall"], key="corr_method")
	with col2:
	selected_cols = st.multiselect("Select columns", numeric_cols, default=numeric_cols[:10], key="corr_cols")

	if selected_cols and st.button("Generate Correlation Matrix"):
	fig = UAP_Visualizer.plot_correlation_matrix(df[selected_cols], method=corr_method)
	st.plotly_chart(fig, use_container_width=True)

	elif viz_type == "Time Series" and len(datetime_cols) > 0 and len(numeric_cols) > 0:
	col1, col2, col3 = st.columns(3)

	with col1:
	date_col = st.selectbox("Date column", datetime_cols, key="ts_date")
	with col2:
	value_cols = st.multiselect("Value columns", numeric_cols, default=numeric_cols[:3], key="ts_values")
	with col3:
	resample_freq = st.selectbox("Resample frequency", ["None", "D", "W", "M"], key="ts_freq")
	resample_freq = None if resample_freq == "None" else resample_freq

	if value_cols and st.button("Generate Time Series"):
	fig = UAP_Visualizer.plot_time_series(df, date_col, value_cols, resample_freq=resample_freq)
	st.plotly_chart(fig, use_container_width=True)

	def show_dashboard_view(df: pd.DataFrame, max_points: int):
	"""Demonstrate dashboard capabilities"""

	st.header("📈 Interactive Dashboard")

	# Create multiple charts for dashboard
	charts = []
	numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
	categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()

	if len(numeric_cols) >= 2 and len(categorical_cols) >= 1:
	with st.spinner("Generating dashboard charts..."):
	# Chart 1: Scatter plot
	if len(numeric_cols) >= 2:
	fig1 = UAP_Visualizer.plot_interactive_scatter(
	df, numeric_cols[0], numeric_cols[1],
	color_col=categorical_cols[0] if categorical_cols else None,
	max_points=max_points//4
	)
	charts.append(fig1)

	# Chart 2: Histogram
	if len(numeric_cols) >= 1:
	fig2 = UAP_Visualizer.plot_interactive_histogram(df, numeric_cols[0])
	charts.append(fig2)

	# Chart 3: Treemap
	if len(categorical_cols) >= 1:
	fig3 = UAP_Visualizer.plot_interactive_treemap(df, categorical_cols[0], top_n=15)
	charts.append(fig3)

	# Chart 4: Correlation matrix (if enough numeric columns)
	if len(numeric_cols) >= 3:
	fig4 = UAP_Visualizer.plot_correlation_matrix(df[numeric_cols[:5]])
	charts.append(fig4)

	# Display individual charts
	if len(charts) >= 2:
	col1, col2 = st.columns(2)

	with col1:
	st.plotly_chart(charts[0], use_container_width=True)
	if len(charts) >= 3:
	st.plotly_chart(charts[2], use_container_width=True)

	with col2:
	st.plotly_chart(charts[1], use_container_width=True)
	if len(charts) >= 4:
	st.plotly_chart(charts[3], use_container_width=True)

	# Combined dashboard view
	if st.button("Generate Combined Dashboard"):
	dashboard_fig = UAP_Visualizer.create_dashboard_layout(charts[:4], layout="2x2")
	st.plotly_chart(dashboard_fig, use_container_width=True)

	else:
	st.warning("Not enough numeric or categorical columns for dashboard generation")

	def show_performance_demo(df: pd.DataFrame):
	"""Demonstrate performance improvements"""

	st.header("⚡ Performance Demonstration")

	# Performance metrics
	col1, col2, col3 = st.columns(3)

	with col1:
	st.metric("Dataset Size", f"{len(df):,} rows")
	with col2:
	st.metric("Memory Usage", f"{df.memory_usage(deep=True).sum() / 1024**2:.1f} MB")
	with col3:
	cache_info = st.session_state.get('cached_visualizations', {})
	st.metric("Cached Visualizations", len(cache_info))

	# Performance comparison
	st.subheader("🏃‍♂️ Speed Comparison")

	if st.button("Run Performance Test"):
	import time

	# Test data profiling speed
	start_time = time.time()
	profile = DataProcessor.profile_data(df)
	profile_time = time.time() - start_time

	# Test visualization generation
	numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
	if len(numeric_cols) >= 1:
	start_time = time.time()
	fig = UAP_Visualizer.plot_interactive_histogram(df, numeric_cols[0])
	viz_time = time.time() - start_time
	else:
	viz_time = 0

	# Display results
	perf_col1, perf_col2 = st.columns(2)

	with perf_col1:
	st.metric("Data Profiling", f"{profile_time:.3f} seconds")
	with perf_col2:
	st.metric("Visualization Generation", f"{viz_time:.3f} seconds")

	# Show caching benefits
	st.info("🚀 Subsequent calls to the same functions will be much faster due to caching!")

	# Memory optimization demo
	if len(df) > 10000:
	st.subheader("📊 Smart Sampling Demo")

	sample_sizes = [1000, 5000, 10000, len(df)]
	sample_times = []

	for size in sample_sizes:
	if size <= len(df):
	start_time = time.time()
	sampled_df = UAP_Visualizer._smart_sampling(df, max_points=size)
	sample_time = time.time() - start_time
	sample_times.append(sample_time)
	else:
	sample_times.append(None)

	# Create performance chart
	perf_data = {
	'Sample Size': [f"{size:,}" for size in sample_sizes if sample_times[sample_sizes.index(size)] is not None],
	'Processing Time': [t for t in sample_times if t is not None]
	}

	perf_df = pd.DataFrame(perf_data)
	st.line_chart(perf_df.set_index('Sample Size'))

	if __name__ == "__main__":
	main()