Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import plotly.graph_objects as go | |
| import plotly.express as px | |
| from plotly.subplots import make_subplots | |
| import numpy as np | |
| import os | |
| import glob | |
| from datetime import datetime | |
| import re | |
| import io | |
| import zipfile | |
| # 設置頁面配置 | |
| st.set_page_config( | |
| page_title="台灣交通事故數據可視化器", | |
| page_icon="🚗", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| class StreamlitTrafficVisualizer: | |
| def __init__(self): | |
| """初始化 Streamlit 交通數據可視化器""" | |
| self.colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#FFA07A', '#98D8C8', | |
| '#F7DC6F', '#BB8FCE', '#85C1E9', '#F8C471', '#82E0AA'] | |
| # 初始化 session state | |
| if 'data' not in st.session_state: | |
| st.session_state.data = None | |
| if 'processed_data' not in st.session_state: | |
| st.session_state.processed_data = None | |
| if 'data_loaded' not in st.session_state: | |
| st.session_state.data_loaded = False | |
| def create_sample_data(self): | |
| """創建範例數據""" | |
| np.random.seed(42) | |
| years = ['2022', '2023', '2024'] | |
| months = [f"{i:02d}" for i in range(1, 13)] | |
| stats_types = ['事故件數', '死亡人數', '受傷人數'] | |
| sample_data = [] | |
| for year in years: | |
| for month in months: | |
| for stats_type in stats_types: | |
| # 根據統計類型設置基礎值 | |
| if stats_type == '事故件數': | |
| base_value = np.random.randint(2000, 3000) | |
| elif stats_type == '死亡人數': | |
| base_value = np.random.randint(200, 400) | |
| else: # 受傷人數 | |
| base_value = np.random.randint(2500, 4000) | |
| # 添加季節性變化 | |
| seasonal_factor = 1.0 | |
| if month in ['06', '07', '08']: # 夏季稍高 | |
| seasonal_factor = 1.15 | |
| elif month in ['12', '01', '02']: # 冬季稍高 | |
| seasonal_factor = 1.1 | |
| final_value = int(base_value * seasonal_factor) | |
| sample_data.append({ | |
| '頁面名稱': f'交通統計_{stats_type}', | |
| '日期': f'{year}年{month}月', | |
| '統計項目': f'{stats_type}統計', | |
| '數值': final_value, | |
| '年份': year, | |
| '月份': month, | |
| '統計類型': stats_type, | |
| '年份類型': 'western' | |
| }) | |
| return pd.DataFrame(sample_data) | |
| def _categorize_stats(self, item): | |
| """統計項目分類""" | |
| if pd.isna(item): | |
| return '其他' | |
| item = str(item) | |
| if '總計' in item or 'total' in item.lower(): | |
| return '總計' | |
| elif '事故件數' in item or '事故數' in item: | |
| return '事故件數' | |
| elif '死亡人數' in item or '死亡數' in item: | |
| return '死亡人數' | |
| elif '受傷人數' in item or '受傷數' in item: | |
| return '受傷人數' | |
| else: | |
| return '其他' | |
| def preprocess_data(self, data): | |
| """數據預處理""" | |
| if data is None or len(data) == 0: | |
| return None | |
| data = data.copy() | |
| # 確保數值欄位是數字格式 | |
| if '數值' in data.columns: | |
| data['數值'] = pd.to_numeric(data['數值'], errors='coerce') | |
| data = data.dropna(subset=['數值']) | |
| # 提取年份信息 | |
| if '日期' in data.columns: | |
| # 嘗試多種年份提取模式 | |
| year_patterns = [ | |
| (r'(\d{4})年', 'western'), # 2023年 (西元年) | |
| (r'(\d{2,3})年', 'minguo'), # 113年 (民國年) | |
| (r'(\d{4})\-', 'western'), # 2023- | |
| (r'(\d{4})\/', 'western'), # 2023/ | |
| (r'(\d{4})\.', 'western'), # 2023. | |
| (r'(\d{4})', 'western'), # 2023 | |
| ] | |
| data['年份'] = None | |
| data['原始年份'] = None | |
| for pattern, year_type in year_patterns: | |
| if data['年份'].isna().all(): | |
| extracted = data['日期'].str.extract(pattern, expand=False) | |
| valid_years = extracted.dropna() | |
| if len(valid_years) > 0: | |
| data['原始年份'] = extracted | |
| data['年份類型'] = year_type | |
| if year_type == 'minguo': | |
| # 民國年轉西元年 | |
| try: | |
| numeric_years = pd.to_numeric(extracted, errors='coerce') | |
| converted_years = numeric_years + 1911 | |
| reasonable_years = converted_years[(converted_years >= 1900) & (converted_years <= 2100)] | |
| if len(reasonable_years.dropna()) > 0: | |
| data['年份'] = converted_years.astype(str) | |
| else: | |
| continue | |
| except Exception: | |
| continue | |
| else: | |
| data['年份'] = extracted | |
| break | |
| # 提取月份信息 | |
| data['月份'] = None | |
| month_patterns = [ | |
| r'\d{2,4}年(\d{1,2})月', # 通用年月格式 | |
| r'\d{4}\-(\d{1,2})', # 2024-01 | |
| r'\d{4}\/(\d{1,2})', # 2024/01 | |
| r'\d{4}\.(\d{1,2})', # 2024.01 | |
| ] | |
| for pattern in month_patterns: | |
| if data['月份'].isna().all(): | |
| extracted = data['日期'].str.extract(pattern, expand=False) | |
| valid_months = extracted.dropna() | |
| if len(valid_months) > 0: | |
| data['月份'] = extracted.astype(str).str.zfill(2) | |
| break | |
| if data['月份'].isna().all(): | |
| data['月份'] = '00' | |
| # 統計項目分類 | |
| if '統計項目' in data.columns: | |
| data['統計類型'] = data['統計項目'].apply(self._categorize_stats) | |
| # 篩選有效數據 | |
| if '統計項目' in data.columns: | |
| target_keywords = ['事故', '死亡', '受傷', '件數', '人數', '總計'] | |
| mask = data['統計項目'].str.contains('|'.join(target_keywords), na=False, regex=True) | |
| filtered_data = data[mask].copy() | |
| # 排除純總計項目 | |
| exclude_mask = (filtered_data['統計項目'] == '總計') | |
| data = filtered_data[~exclude_mask].copy() | |
| # 過濾有年份的數據 | |
| if '年份' in data.columns: | |
| data = data[data['年份'].notna()].copy() | |
| return data | |
| def create_monthly_trend_chart(self, data): | |
| """創建月份趨勢圖""" | |
| if data is None or len(data) == 0: | |
| return None | |
| # 檢查必要欄位 | |
| required_cols = ['年份', '月份', '統計類型', '數值'] | |
| missing_cols = [col for col in required_cols if col not in data.columns] | |
| if missing_cols: | |
| st.error(f"缺少必要欄位: {missing_cols}") | |
| return None | |
| # 過濾有效數據 | |
| valid_data = data[ | |
| data['年份'].notna() & | |
| data['月份'].notna() & | |
| data['統計類型'].notna() & | |
| data['數值'].notna() | |
| ].copy() | |
| if len(valid_data) == 0: | |
| st.warning("沒有有效的月份數據") | |
| return None | |
| # 按年月和統計類型聚合 | |
| try: | |
| monthly_agg = valid_data.groupby(['年份', '月份', '統計類型'])['數值'].sum().reset_index() | |
| # 創建年月顯示格式 | |
| monthly_agg['年月顯示'] = monthly_agg['年份'] + '年' + monthly_agg['月份'] + '月' | |
| monthly_agg['時間戳'] = monthly_agg['年份'] + '-' + monthly_agg['月份'] | |
| monthly_agg = monthly_agg.sort_values('時間戳') | |
| if len(monthly_agg) == 0: | |
| st.warning("聚合後沒有數據") | |
| return None | |
| except Exception as e: | |
| st.error(f"數據聚合失敗: {e}") | |
| return None | |
| # 創建圖表 | |
| try: | |
| fig = go.Figure() | |
| colors_map = {'事故件數': '#FF6B6B', '死亡人數': '#4ECDC4', '受傷人數': '#45B7D1', '其他': '#95A5A6'} | |
| stats_types = monthly_agg['統計類型'].unique() | |
| for stats_type in stats_types: | |
| type_data = monthly_agg[monthly_agg['統計類型'] == stats_type] | |
| if len(type_data) > 0: | |
| fig.add_trace(go.Scatter( | |
| x=type_data['年月顯示'], | |
| y=type_data['數值'], | |
| mode='lines+markers', | |
| name=stats_type, | |
| line=dict( | |
| color=colors_map.get(stats_type, '#95A5A6'), | |
| width=3 | |
| ), | |
| marker=dict(size=8), | |
| hovertemplate='<b>%{fullData.name}</b><br>' + | |
| 'Time: %{x}<br>' + | |
| 'Value: %{y:,.0f}<br>' + | |
| '<extra></extra>' | |
| )) | |
| fig.update_layout( | |
| title={ | |
| 'text': "Taiwan Traffic Accident Statistics - Monthly Trend Analysis", | |
| 'x': 0.5, | |
| 'font': {'size': 20} | |
| }, | |
| xaxis_title="Year-Month", | |
| yaxis_title="Value", | |
| xaxis=dict( | |
| tickangle=45, | |
| type='category' | |
| ), | |
| yaxis=dict( | |
| rangemode='tozero' | |
| ), | |
| hovermode='x unified', | |
| height=600, | |
| showlegend=True, | |
| legend=dict( | |
| orientation="h", | |
| yanchor="bottom", | |
| y=1.02, | |
| xanchor="center", | |
| x=0.5 | |
| ) | |
| ) | |
| return fig | |
| except Exception as e: | |
| st.error(f"月份趨勢圖創建失敗: {e}") | |
| return None | |
| def create_animated_pie_chart(self, data): | |
| """創建動態餅圖""" | |
| if data is None or len(data) == 0: | |
| return None | |
| # 檢查必要欄位 | |
| required_cols = ['年份', '統計類型', '數值'] | |
| missing_cols = [col for col in required_cols if col not in data.columns] | |
| if missing_cols: | |
| st.error(f"缺少必要欄位: {missing_cols}") | |
| return None | |
| # 過濾有效數據 | |
| valid_data = data[ | |
| data['年份'].notna() & | |
| data['統計類型'].notna() & | |
| data['數值'].notna() | |
| ].copy() | |
| if len(valid_data) == 0: | |
| st.warning("沒有完整的有效數據") | |
| return None | |
| # 按年份和統計類型聚合數據 | |
| try: | |
| agg_data = valid_data.groupby(['年份', '統計類型'])['數值'].sum().reset_index() | |
| if len(agg_data) == 0: | |
| st.warning("聚合後沒有數據") | |
| return None | |
| except Exception as e: | |
| st.error(f"數據聚合失敗: {e}") | |
| return None | |
| # 獲取所有年份 | |
| years = sorted(agg_data['年份'].unique()) | |
| if len(years) == 0: | |
| st.warning("沒有有效的年份數據") | |
| return None | |
| # 創建動態餅圖的框架 | |
| frames = [] | |
| for year in years: | |
| year_data = agg_data[agg_data['年份'] == year] | |
| if len(year_data) > 0: | |
| frames.append(go.Frame( | |
| data=[go.Pie( | |
| labels=year_data['統計類型'], | |
| values=year_data['數值'], | |
| hole=0.4, | |
| hovertemplate='<b>%{label}</b><br>' + | |
| 'Value: %{value:,.0f}<br>' + | |
| 'Percentage: %{percent}<br>' + | |
| '<extra></extra>', | |
| textinfo='label+percent', | |
| textposition='auto', | |
| marker=dict(colors=self.colors[:len(year_data)]) | |
| )], | |
| name=str(year) | |
| )) | |
| if len(frames) == 0: | |
| st.warning("沒有創建任何動畫框架") | |
| return None | |
| # 初始圖表(第一年) | |
| first_year_data = agg_data[agg_data['年份'] == years[0]] | |
| try: | |
| fig = go.Figure( | |
| data=[go.Pie( | |
| labels=first_year_data['統計類型'], | |
| values=first_year_data['數值'], | |
| hole=0.4, | |
| hovertemplate='<b>%{label}</b><br>' + | |
| 'Value: %{value:,.0f}<br>' + | |
| 'Percentage: %{percent}<br>' + | |
| '<extra></extra>', | |
| textinfo='label+percent', | |
| textposition='auto', | |
| marker=dict(colors=self.colors[:len(first_year_data)]) | |
| )], | |
| frames=frames | |
| ) | |
| # 添加動畫控制 | |
| fig.update_layout( | |
| title={ | |
| 'text': f"Taiwan Traffic Accident Statistics - Animated Pie Chart", | |
| 'x': 0.5, | |
| 'font': {'size': 20} | |
| }, | |
| updatemenus=[{ | |
| 'type': 'buttons', | |
| 'direction': 'left', | |
| 'x': 0.1, | |
| 'y': 0.1, | |
| 'showactive': False, | |
| 'buttons': [ | |
| { | |
| 'label': 'Play', | |
| 'method': 'animate', | |
| 'args': [None, { | |
| 'frame': {'duration': 1500, 'redraw': True}, | |
| 'fromcurrent': True, | |
| 'transition': {'duration': 500} | |
| }] | |
| }, | |
| { | |
| 'label': 'Pause', | |
| 'method': 'animate', | |
| 'args': [[None], { | |
| 'frame': {'duration': 0, 'redraw': False}, | |
| 'mode': 'immediate', | |
| 'transition': {'duration': 0} | |
| }] | |
| } | |
| ] | |
| }], | |
| sliders=[{ | |
| 'currentvalue': {'prefix': 'Year: ', 'font': {'size': 16}}, | |
| 'len': 0.8, | |
| 'x': 0.1, | |
| 'steps': [ | |
| { | |
| 'label': f"{year}", | |
| 'method': 'animate', | |
| 'args': [[str(year)], { | |
| 'frame': {'duration': 500, 'redraw': True}, | |
| 'transition': {'duration': 300} | |
| }] | |
| } for year in years | |
| ] | |
| }], | |
| height=600 | |
| ) | |
| return fig | |
| except Exception as e: | |
| st.error(f"動態餅圖創建失敗: {e}") | |
| return None | |
| def create_bar_chart(self, data): | |
| """創建柱狀圖""" | |
| if data is None or len(data) == 0: | |
| return None | |
| # 按年份和統計類型聚合 | |
| yearly_data = data.groupby(['年份', '統計類型'])['數值'].sum().reset_index() | |
| if len(yearly_data) == 0: | |
| return None | |
| fig = px.bar( | |
| yearly_data, | |
| x='年份', | |
| y='數值', | |
| color='統計類型', | |
| title="Taiwan Traffic Accident Statistics - Yearly Comparison", | |
| color_discrete_map={'事故件數': '#FF6B6B', '死亡人數': '#4ECDC4', '受傷人數': '#45B7D1'} | |
| ) | |
| fig.update_layout( | |
| xaxis_title="Year", | |
| yaxis_title="Value", | |
| height=500 | |
| ) | |
| return fig | |
| def main(): | |
| # 創建可視化器實例 | |
| visualizer = StreamlitTrafficVisualizer() | |
| # 標題 | |
| st.title("🚗 台灣交通事故數據可視化器") | |
| st.markdown("---") | |
| # 側邊欄 | |
| st.sidebar.header("📊 數據來源選擇") | |
| data_source = st.sidebar.radio( | |
| "選擇數據來源:", | |
| ["上傳檔案", "使用範例數據"] | |
| ) | |
| # 數據載入 | |
| if data_source == "上傳檔案": | |
| uploaded_file = st.sidebar.file_uploader( | |
| "上傳 CSV 或 Excel 檔案", | |
| type=['csv', 'xlsx', 'xls'], | |
| help="請上傳包含交通事故數據的檔案" | |
| ) | |
| if uploaded_file is not None: | |
| try: | |
| if uploaded_file.name.endswith('.csv'): | |
| data = pd.read_csv(uploaded_file, encoding='utf-8') | |
| else: | |
| data = pd.read_excel(uploaded_file) | |
| st.session_state.data = data | |
| st.session_state.data_loaded = True | |
| st.sidebar.success("檔案上傳成功!") | |
| except Exception as e: | |
| st.sidebar.error(f"檔案讀取失敗: {e}") | |
| st.session_state.data_loaded = False | |
| else: | |
| st.session_state.data_loaded = False | |
| else: # 使用範例數據 | |
| if st.sidebar.button("載入範例數據"): | |
| data = visualizer.create_sample_data() | |
| st.session_state.data = data | |
| st.session_state.data_loaded = True | |
| st.sidebar.success("範例數據載入成功!") | |
| # 主要內容區域 | |
| if st.session_state.data_loaded and st.session_state.data is not None: | |
| # 數據預處理 | |
| with st.spinner("正在處理數據..."): | |
| processed_data = visualizer.preprocess_data(st.session_state.data) | |
| st.session_state.processed_data = processed_data | |
| if processed_data is not None and len(processed_data) > 0: | |
| # 顯示數據摘要 | |
| st.header("📈 數據摘要") | |
| col1, col2, col3, col4 = st.columns(4) | |
| with col1: | |
| st.metric("總記錄數", len(processed_data)) | |
| with col2: | |
| if '年份' in processed_data.columns: | |
| years = processed_data['年份'].dropna().nunique() | |
| st.metric("涵蓋年份", years) | |
| with col3: | |
| if '統計類型' in processed_data.columns: | |
| types = processed_data['統計類型'].nunique() | |
| st.metric("統計類型", types) | |
| with col4: | |
| if '數值' in processed_data.columns: | |
| total_value = processed_data['數值'].sum() | |
| st.metric("總數值", f"{total_value:,.0f}") | |
| # 顯示數據預覽 | |
| with st.expander("📋 數據預覽", expanded=False): | |
| st.dataframe(processed_data.head(10)) | |
| # 可視化圖表 | |
| st.header("📊 可視化圖表") | |
| # 選擇圖表類型 | |
| chart_tabs = st.tabs(["月份趨勢圖", "動態餅圖", "年度柱狀圖"]) | |
| with chart_tabs[0]: | |
| st.subheader("📈 月份趨勢分析") | |
| trend_chart = visualizer.create_monthly_trend_chart(processed_data) | |
| if trend_chart: | |
| st.plotly_chart(trend_chart, use_container_width=True) | |
| else: | |
| st.warning("無法創建月份趨勢圖,可能是數據不足或格式不正確") | |
| with chart_tabs[1]: | |
| st.subheader("🥧 動態餅圖分析") | |
| pie_chart = visualizer.create_animated_pie_chart(processed_data) | |
| if pie_chart: | |
| st.plotly_chart(pie_chart, use_container_width=True) | |
| else: | |
| st.warning("無法創建動態餅圖,可能是數據不足或格式不正確") | |
| with chart_tabs[2]: | |
| st.subheader("📊 年度柱狀圖比較") | |
| bar_chart = visualizer.create_bar_chart(processed_data) | |
| if bar_chart: | |
| st.plotly_chart(bar_chart, use_container_width=True) | |
| else: | |
| st.warning("無法創建柱狀圖,可能是數據不足或格式不正確") | |
| # 數據統計 | |
| st.header("📊 詳細統計") | |
| if '統計類型' in processed_data.columns and '數值' in processed_data.columns: | |
| stats_summary = processed_data.groupby('統計類型')['數值'].agg(['sum', 'mean', 'count']).round(2) | |
| stats_summary.columns = ['總計', '平均值', '記錄數'] | |
| st.dataframe(stats_summary) | |
| # 下載處理後的數據 | |
| st.header("💾 下載數據") | |
| csv = processed_data.to_csv(index=False, encoding='utf-8-sig') | |
| st.download_button( | |
| label="下載處理後的數據 (CSV)", | |
| data=csv, | |
| file_name="processed_traffic_data.csv", | |
| mime="text/csv" | |
| ) | |
| else: | |
| st.error("數據處理失敗,請檢查數據格式") | |
| else: | |
| # 歡迎頁面 | |
| st.header("🎯 歡迎使用台灣交通事故數據可視化器") | |
| st.markdown(""" | |
| ### 📋 功能特色: | |
| - 📈 **月份趨勢分析**:顯示交通事故隨時間的變化趨勢 | |
| - 🥧 **動態餅圖**:按年份顯示各統計類型的比例分布 | |
| - 📊 **年度比較**:柱狀圖比較不同年份的數據 | |
| - 🔄 **智能數據處理**:自動識別民國年和西元年格式 | |
| - 📁 **多格式支援**:支援 CSV 和 Excel 檔案 | |
| ### 🚀 開始使用: | |
| 1. 在左側選擇「上傳檔案」或「使用範例數據」 | |
| 2. 如果選擇上傳檔案,請上傳包含交通事故數據的 CSV 或 Excel 檔案 | |
| 3. 系統會自動處理數據並生成可視化圖表 | |
| ### 📊 數據格式要求: | |
| 數據應包含以下欄位: | |
| - **日期**:包含年份和月份信息(如:2024年01月) | |
| - **統計項目**:事故類型描述 | |
| - **數值**:統計數值 | |
| ### 💡 提示: | |
| 如果沒有數據檔案,可以點擊「載入範例數據」來體驗系統功能! | |
| """) | |
| if __name__ == "__main__": | |
| main() |