2025_project / src /streamlit_app.py
123Sabrina's picture
Update src/streamlit_app.py
163b09e verified
import streamlit as st
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import numpy as np
import os
import glob
from datetime import datetime
import re
import io
import zipfile
# 設置頁面配置
st.set_page_config(
page_title="台灣交通事故數據可視化器",
page_icon="🚗",
layout="wide",
initial_sidebar_state="expanded"
)
class StreamlitTrafficVisualizer:
def __init__(self):
"""初始化 Streamlit 交通數據可視化器"""
self.colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#FFA07A', '#98D8C8',
'#F7DC6F', '#BB8FCE', '#85C1E9', '#F8C471', '#82E0AA']
# 初始化 session state
if 'data' not in st.session_state:
st.session_state.data = None
if 'processed_data' not in st.session_state:
st.session_state.processed_data = None
if 'data_loaded' not in st.session_state:
st.session_state.data_loaded = False
def create_sample_data(self):
"""創建範例數據"""
np.random.seed(42)
years = ['2022', '2023', '2024']
months = [f"{i:02d}" for i in range(1, 13)]
stats_types = ['事故件數', '死亡人數', '受傷人數']
sample_data = []
for year in years:
for month in months:
for stats_type in stats_types:
# 根據統計類型設置基礎值
if stats_type == '事故件數':
base_value = np.random.randint(2000, 3000)
elif stats_type == '死亡人數':
base_value = np.random.randint(200, 400)
else: # 受傷人數
base_value = np.random.randint(2500, 4000)
# 添加季節性變化
seasonal_factor = 1.0
if month in ['06', '07', '08']: # 夏季稍高
seasonal_factor = 1.15
elif month in ['12', '01', '02']: # 冬季稍高
seasonal_factor = 1.1
final_value = int(base_value * seasonal_factor)
sample_data.append({
'頁面名稱': f'交通統計_{stats_type}',
'日期': f'{year}{month}月',
'統計項目': f'{stats_type}統計',
'數值': final_value,
'年份': year,
'月份': month,
'統計類型': stats_type,
'年份類型': 'western'
})
return pd.DataFrame(sample_data)
def _categorize_stats(self, item):
"""統計項目分類"""
if pd.isna(item):
return '其他'
item = str(item)
if '總計' in item or 'total' in item.lower():
return '總計'
elif '事故件數' in item or '事故數' in item:
return '事故件數'
elif '死亡人數' in item or '死亡數' in item:
return '死亡人數'
elif '受傷人數' in item or '受傷數' in item:
return '受傷人數'
else:
return '其他'
def preprocess_data(self, data):
"""數據預處理"""
if data is None or len(data) == 0:
return None
data = data.copy()
# 確保數值欄位是數字格式
if '數值' in data.columns:
data['數值'] = pd.to_numeric(data['數值'], errors='coerce')
data = data.dropna(subset=['數值'])
# 提取年份信息
if '日期' in data.columns:
# 嘗試多種年份提取模式
year_patterns = [
(r'(\d{4})年', 'western'), # 2023年 (西元年)
(r'(\d{2,3})年', 'minguo'), # 113年 (民國年)
(r'(\d{4})\-', 'western'), # 2023-
(r'(\d{4})\/', 'western'), # 2023/
(r'(\d{4})\.', 'western'), # 2023.
(r'(\d{4})', 'western'), # 2023
]
data['年份'] = None
data['原始年份'] = None
for pattern, year_type in year_patterns:
if data['年份'].isna().all():
extracted = data['日期'].str.extract(pattern, expand=False)
valid_years = extracted.dropna()
if len(valid_years) > 0:
data['原始年份'] = extracted
data['年份類型'] = year_type
if year_type == 'minguo':
# 民國年轉西元年
try:
numeric_years = pd.to_numeric(extracted, errors='coerce')
converted_years = numeric_years + 1911
reasonable_years = converted_years[(converted_years >= 1900) & (converted_years <= 2100)]
if len(reasonable_years.dropna()) > 0:
data['年份'] = converted_years.astype(str)
else:
continue
except Exception:
continue
else:
data['年份'] = extracted
break
# 提取月份信息
data['月份'] = None
month_patterns = [
r'\d{2,4}年(\d{1,2})月', # 通用年月格式
r'\d{4}\-(\d{1,2})', # 2024-01
r'\d{4}\/(\d{1,2})', # 2024/01
r'\d{4}\.(\d{1,2})', # 2024.01
]
for pattern in month_patterns:
if data['月份'].isna().all():
extracted = data['日期'].str.extract(pattern, expand=False)
valid_months = extracted.dropna()
if len(valid_months) > 0:
data['月份'] = extracted.astype(str).str.zfill(2)
break
if data['月份'].isna().all():
data['月份'] = '00'
# 統計項目分類
if '統計項目' in data.columns:
data['統計類型'] = data['統計項目'].apply(self._categorize_stats)
# 篩選有效數據
if '統計項目' in data.columns:
target_keywords = ['事故', '死亡', '受傷', '件數', '人數', '總計']
mask = data['統計項目'].str.contains('|'.join(target_keywords), na=False, regex=True)
filtered_data = data[mask].copy()
# 排除純總計項目
exclude_mask = (filtered_data['統計項目'] == '總計')
data = filtered_data[~exclude_mask].copy()
# 過濾有年份的數據
if '年份' in data.columns:
data = data[data['年份'].notna()].copy()
return data
def create_monthly_trend_chart(self, data):
"""創建月份趨勢圖"""
if data is None or len(data) == 0:
return None
# 檢查必要欄位
required_cols = ['年份', '月份', '統計類型', '數值']
missing_cols = [col for col in required_cols if col not in data.columns]
if missing_cols:
st.error(f"缺少必要欄位: {missing_cols}")
return None
# 過濾有效數據
valid_data = data[
data['年份'].notna() &
data['月份'].notna() &
data['統計類型'].notna() &
data['數值'].notna()
].copy()
if len(valid_data) == 0:
st.warning("沒有有效的月份數據")
return None
# 按年月和統計類型聚合
try:
monthly_agg = valid_data.groupby(['年份', '月份', '統計類型'])['數值'].sum().reset_index()
# 創建年月顯示格式
monthly_agg['年月顯示'] = monthly_agg['年份'] + '年' + monthly_agg['月份'] + '月'
monthly_agg['時間戳'] = monthly_agg['年份'] + '-' + monthly_agg['月份']
monthly_agg = monthly_agg.sort_values('時間戳')
if len(monthly_agg) == 0:
st.warning("聚合後沒有數據")
return None
except Exception as e:
st.error(f"數據聚合失敗: {e}")
return None
# 創建圖表
try:
fig = go.Figure()
colors_map = {'事故件數': '#FF6B6B', '死亡人數': '#4ECDC4', '受傷人數': '#45B7D1', '其他': '#95A5A6'}
stats_types = monthly_agg['統計類型'].unique()
for stats_type in stats_types:
type_data = monthly_agg[monthly_agg['統計類型'] == stats_type]
if len(type_data) > 0:
fig.add_trace(go.Scatter(
x=type_data['年月顯示'],
y=type_data['數值'],
mode='lines+markers',
name=stats_type,
line=dict(
color=colors_map.get(stats_type, '#95A5A6'),
width=3
),
marker=dict(size=8),
hovertemplate='<b>%{fullData.name}</b><br>' +
'Time: %{x}<br>' +
'Value: %{y:,.0f}<br>' +
'<extra></extra>'
))
fig.update_layout(
title={
'text': "Taiwan Traffic Accident Statistics - Monthly Trend Analysis",
'x': 0.5,
'font': {'size': 20}
},
xaxis_title="Year-Month",
yaxis_title="Value",
xaxis=dict(
tickangle=45,
type='category'
),
yaxis=dict(
rangemode='tozero'
),
hovermode='x unified',
height=600,
showlegend=True,
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="center",
x=0.5
)
)
return fig
except Exception as e:
st.error(f"月份趨勢圖創建失敗: {e}")
return None
def create_animated_pie_chart(self, data):
"""創建動態餅圖"""
if data is None or len(data) == 0:
return None
# 檢查必要欄位
required_cols = ['年份', '統計類型', '數值']
missing_cols = [col for col in required_cols if col not in data.columns]
if missing_cols:
st.error(f"缺少必要欄位: {missing_cols}")
return None
# 過濾有效數據
valid_data = data[
data['年份'].notna() &
data['統計類型'].notna() &
data['數值'].notna()
].copy()
if len(valid_data) == 0:
st.warning("沒有完整的有效數據")
return None
# 按年份和統計類型聚合數據
try:
agg_data = valid_data.groupby(['年份', '統計類型'])['數值'].sum().reset_index()
if len(agg_data) == 0:
st.warning("聚合後沒有數據")
return None
except Exception as e:
st.error(f"數據聚合失敗: {e}")
return None
# 獲取所有年份
years = sorted(agg_data['年份'].unique())
if len(years) == 0:
st.warning("沒有有效的年份數據")
return None
# 創建動態餅圖的框架
frames = []
for year in years:
year_data = agg_data[agg_data['年份'] == year]
if len(year_data) > 0:
frames.append(go.Frame(
data=[go.Pie(
labels=year_data['統計類型'],
values=year_data['數值'],
hole=0.4,
hovertemplate='<b>%{label}</b><br>' +
'Value: %{value:,.0f}<br>' +
'Percentage: %{percent}<br>' +
'<extra></extra>',
textinfo='label+percent',
textposition='auto',
marker=dict(colors=self.colors[:len(year_data)])
)],
name=str(year)
))
if len(frames) == 0:
st.warning("沒有創建任何動畫框架")
return None
# 初始圖表(第一年)
first_year_data = agg_data[agg_data['年份'] == years[0]]
try:
fig = go.Figure(
data=[go.Pie(
labels=first_year_data['統計類型'],
values=first_year_data['數值'],
hole=0.4,
hovertemplate='<b>%{label}</b><br>' +
'Value: %{value:,.0f}<br>' +
'Percentage: %{percent}<br>' +
'<extra></extra>',
textinfo='label+percent',
textposition='auto',
marker=dict(colors=self.colors[:len(first_year_data)])
)],
frames=frames
)
# 添加動畫控制
fig.update_layout(
title={
'text': f"Taiwan Traffic Accident Statistics - Animated Pie Chart",
'x': 0.5,
'font': {'size': 20}
},
updatemenus=[{
'type': 'buttons',
'direction': 'left',
'x': 0.1,
'y': 0.1,
'showactive': False,
'buttons': [
{
'label': 'Play',
'method': 'animate',
'args': [None, {
'frame': {'duration': 1500, 'redraw': True},
'fromcurrent': True,
'transition': {'duration': 500}
}]
},
{
'label': 'Pause',
'method': 'animate',
'args': [[None], {
'frame': {'duration': 0, 'redraw': False},
'mode': 'immediate',
'transition': {'duration': 0}
}]
}
]
}],
sliders=[{
'currentvalue': {'prefix': 'Year: ', 'font': {'size': 16}},
'len': 0.8,
'x': 0.1,
'steps': [
{
'label': f"{year}",
'method': 'animate',
'args': [[str(year)], {
'frame': {'duration': 500, 'redraw': True},
'transition': {'duration': 300}
}]
} for year in years
]
}],
height=600
)
return fig
except Exception as e:
st.error(f"動態餅圖創建失敗: {e}")
return None
def create_bar_chart(self, data):
"""創建柱狀圖"""
if data is None or len(data) == 0:
return None
# 按年份和統計類型聚合
yearly_data = data.groupby(['年份', '統計類型'])['數值'].sum().reset_index()
if len(yearly_data) == 0:
return None
fig = px.bar(
yearly_data,
x='年份',
y='數值',
color='統計類型',
title="Taiwan Traffic Accident Statistics - Yearly Comparison",
color_discrete_map={'事故件數': '#FF6B6B', '死亡人數': '#4ECDC4', '受傷人數': '#45B7D1'}
)
fig.update_layout(
xaxis_title="Year",
yaxis_title="Value",
height=500
)
return fig
def main():
# 創建可視化器實例
visualizer = StreamlitTrafficVisualizer()
# 標題
st.title("🚗 台灣交通事故數據可視化器")
st.markdown("---")
# 側邊欄
st.sidebar.header("📊 數據來源選擇")
data_source = st.sidebar.radio(
"選擇數據來源:",
["上傳檔案", "使用範例數據"]
)
# 數據載入
if data_source == "上傳檔案":
uploaded_file = st.sidebar.file_uploader(
"上傳 CSV 或 Excel 檔案",
type=['csv', 'xlsx', 'xls'],
help="請上傳包含交通事故數據的檔案"
)
if uploaded_file is not None:
try:
if uploaded_file.name.endswith('.csv'):
data = pd.read_csv(uploaded_file, encoding='utf-8')
else:
data = pd.read_excel(uploaded_file)
st.session_state.data = data
st.session_state.data_loaded = True
st.sidebar.success("檔案上傳成功!")
except Exception as e:
st.sidebar.error(f"檔案讀取失敗: {e}")
st.session_state.data_loaded = False
else:
st.session_state.data_loaded = False
else: # 使用範例數據
if st.sidebar.button("載入範例數據"):
data = visualizer.create_sample_data()
st.session_state.data = data
st.session_state.data_loaded = True
st.sidebar.success("範例數據載入成功!")
# 主要內容區域
if st.session_state.data_loaded and st.session_state.data is not None:
# 數據預處理
with st.spinner("正在處理數據..."):
processed_data = visualizer.preprocess_data(st.session_state.data)
st.session_state.processed_data = processed_data
if processed_data is not None and len(processed_data) > 0:
# 顯示數據摘要
st.header("📈 數據摘要")
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric("總記錄數", len(processed_data))
with col2:
if '年份' in processed_data.columns:
years = processed_data['年份'].dropna().nunique()
st.metric("涵蓋年份", years)
with col3:
if '統計類型' in processed_data.columns:
types = processed_data['統計類型'].nunique()
st.metric("統計類型", types)
with col4:
if '數值' in processed_data.columns:
total_value = processed_data['數值'].sum()
st.metric("總數值", f"{total_value:,.0f}")
# 顯示數據預覽
with st.expander("📋 數據預覽", expanded=False):
st.dataframe(processed_data.head(10))
# 可視化圖表
st.header("📊 可視化圖表")
# 選擇圖表類型
chart_tabs = st.tabs(["月份趨勢圖", "動態餅圖", "年度柱狀圖"])
with chart_tabs[0]:
st.subheader("📈 月份趨勢分析")
trend_chart = visualizer.create_monthly_trend_chart(processed_data)
if trend_chart:
st.plotly_chart(trend_chart, use_container_width=True)
else:
st.warning("無法創建月份趨勢圖,可能是數據不足或格式不正確")
with chart_tabs[1]:
st.subheader("🥧 動態餅圖分析")
pie_chart = visualizer.create_animated_pie_chart(processed_data)
if pie_chart:
st.plotly_chart(pie_chart, use_container_width=True)
else:
st.warning("無法創建動態餅圖,可能是數據不足或格式不正確")
with chart_tabs[2]:
st.subheader("📊 年度柱狀圖比較")
bar_chart = visualizer.create_bar_chart(processed_data)
if bar_chart:
st.plotly_chart(bar_chart, use_container_width=True)
else:
st.warning("無法創建柱狀圖,可能是數據不足或格式不正確")
# 數據統計
st.header("📊 詳細統計")
if '統計類型' in processed_data.columns and '數值' in processed_data.columns:
stats_summary = processed_data.groupby('統計類型')['數值'].agg(['sum', 'mean', 'count']).round(2)
stats_summary.columns = ['總計', '平均值', '記錄數']
st.dataframe(stats_summary)
# 下載處理後的數據
st.header("💾 下載數據")
csv = processed_data.to_csv(index=False, encoding='utf-8-sig')
st.download_button(
label="下載處理後的數據 (CSV)",
data=csv,
file_name="processed_traffic_data.csv",
mime="text/csv"
)
else:
st.error("數據處理失敗,請檢查數據格式")
else:
# 歡迎頁面
st.header("🎯 歡迎使用台灣交通事故數據可視化器")
st.markdown("""
### 📋 功能特色:
- 📈 **月份趨勢分析**:顯示交通事故隨時間的變化趨勢
- 🥧 **動態餅圖**:按年份顯示各統計類型的比例分布
- 📊 **年度比較**:柱狀圖比較不同年份的數據
- 🔄 **智能數據處理**:自動識別民國年和西元年格式
- 📁 **多格式支援**:支援 CSV 和 Excel 檔案
### 🚀 開始使用:
1. 在左側選擇「上傳檔案」或「使用範例數據」
2. 如果選擇上傳檔案,請上傳包含交通事故數據的 CSV 或 Excel 檔案
3. 系統會自動處理數據並生成可視化圖表
### 📊 數據格式要求:
數據應包含以下欄位:
- **日期**:包含年份和月份信息(如:2024年01月)
- **統計項目**:事故類型描述
- **數值**:統計數值
### 💡 提示:
如果沒有數據檔案,可以點擊「載入範例數據」來體驗系統功能!
""")
if __name__ == "__main__":
main()