Spaces:
Sleeping
Sleeping
feat: 第二题基本完成
Browse files
app.py
CHANGED
|
@@ -8,7 +8,7 @@ this_directory = this_file.parent
|
|
| 8 |
data_cangzhou_folder = this_directory / "data/Cangzhou"
|
| 9 |
data_static_folder = data_cangzhou_folder / "static"
|
| 10 |
|
| 11 |
-
#
|
| 12 |
data_folder = data_static_folder
|
| 13 |
|
| 14 |
# 然后
|
|
@@ -17,72 +17,133 @@ import pandas as pd
|
|
| 17 |
import os
|
| 18 |
from datetime import datetime
|
| 19 |
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
| 21 |
st.set_page_config(layout="wide")
|
| 22 |
|
| 23 |
-
#
|
| 24 |
-
st.title("
|
| 25 |
|
| 26 |
-
#
|
| 27 |
col1, col2, col3 = st.columns(3)
|
| 28 |
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
# 根据提供的CSV格式,第一列是时间戳但没有列名
|
| 33 |
df = pd.read_csv(file_path, header=0, names=['timestamp', 'pm2d5', 'lat', 'lon'])
|
| 34 |
-
|
| 35 |
-
# 确保时间戳列是datetime格式
|
| 36 |
df['timestamp'] = pd.to_datetime(df['timestamp'])
|
| 37 |
-
|
| 38 |
-
# 过滤2019-01-01 00:00:00到2019-01-01 12:00:00之间的数据
|
| 39 |
-
start_time = datetime(2019, 1, 1, 0, 0, 0)
|
| 40 |
-
end_time = datetime(2019, 1, 1, 12, 0, 0)
|
| 41 |
filtered_df = df[(df['timestamp'] >= start_time) & (df['timestamp'] <= end_time)]
|
| 42 |
-
|
| 43 |
return filtered_df
|
| 44 |
|
| 45 |
-
#
|
| 46 |
with col1:
|
| 47 |
-
st.header("PM2.5
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
-
#
|
| 50 |
csv_files = [f for f in os.listdir(data_folder) if f.endswith('.csv')]
|
| 51 |
-
|
| 52 |
-
# 创建字典存储每个传感器的数据
|
| 53 |
sensor_data = {}
|
| 54 |
-
|
| 55 |
-
# 加载并过滤每个传感器的数据
|
| 56 |
for file in csv_files:
|
| 57 |
file_path = os.path.join(data_folder, file)
|
| 58 |
-
sensor_name = file.split('.')[0]
|
| 59 |
-
sensor_data[sensor_name] = load_and_filter_data(file_path)
|
| 60 |
|
| 61 |
-
#
|
| 62 |
-
# 创建一个以时间戳为索引,每个传感器的PM2.5值为列的数据框
|
| 63 |
chart_data = pd.DataFrame()
|
| 64 |
-
|
| 65 |
for sensor, data in sensor_data.items():
|
| 66 |
-
# 使用pm2d5列作为PM2.5数据
|
| 67 |
chart_data[sensor] = data.set_index('timestamp')['pm2d5']
|
| 68 |
|
| 69 |
-
#
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
y_label="PM2.5水平"
|
| 76 |
-
)
|
| 77 |
|
| 78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
with col2:
|
| 80 |
-
st.header("
|
| 81 |
-
|
| 82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
|
| 84 |
-
#
|
| 85 |
with col3:
|
| 86 |
-
st.header("第三个图表")
|
| 87 |
-
|
| 88 |
-
|
|
|
|
|
|
|
|
|
| 8 |
data_cangzhou_folder = this_directory / "data/Cangzhou"
|
| 9 |
data_static_folder = data_cangzhou_folder / "static"
|
| 10 |
|
| 11 |
+
# 数据文件夹路径(静态数据)
|
| 12 |
data_folder = data_static_folder
|
| 13 |
|
| 14 |
# 然后
|
|
|
|
| 17 |
import os
|
| 18 |
from datetime import datetime
|
| 19 |
|
| 20 |
+
import plotly.graph_objects as go
|
| 21 |
+
import plotly.express as px
|
| 22 |
+
|
| 23 |
+
# 设置页面配置为宽屏模式,以便能同时显示三个图表
|
| 24 |
st.set_page_config(layout="wide")
|
| 25 |
|
| 26 |
+
# 设置应用标题,参考数据集介绍,反映作业要求
|
| 27 |
+
st.title("S&M-HSTPM2d5数据集可视化——清华大学数据可视化课程作业1")
|
| 28 |
|
| 29 |
+
# 创建三个等宽的列,分别展示图(a)、图(b)和图(c)
|
| 30 |
col1, col2, col3 = st.columns(3)
|
| 31 |
|
| 32 |
+
@st.cache_data
|
| 33 |
+
def load_and_filter_data(file_path, start_time, end_time):
|
| 34 |
+
# 根据CSV格式,第一列作为时间戳(无列名),后续列依次为pm2d5、lat、lon
|
|
|
|
| 35 |
df = pd.read_csv(file_path, header=0, names=['timestamp', 'pm2d5', 'lat', 'lon'])
|
| 36 |
+
# 转换为datetime格式
|
|
|
|
| 37 |
df['timestamp'] = pd.to_datetime(df['timestamp'])
|
| 38 |
+
# 过滤指定区间数据
|
|
|
|
|
|
|
|
|
|
| 39 |
filtered_df = df[(df['timestamp'] >= start_time) & (df['timestamp'] <= end_time)]
|
|
|
|
| 40 |
return filtered_df
|
| 41 |
|
| 42 |
+
# 图(a): PM2.5随时间变化折线图(静态传感器数据)
|
| 43 |
with col1:
|
| 44 |
+
st.header("(a)折线图:展示 PM2.5 浓度水平随时间的变化")
|
| 45 |
+
with st.expander("绘制要求"):
|
| 46 |
+
st.markdown("1. 使用 static 文件夹中的 .csv 文件,并筛选出时间戳在 2019-01-01 00:00:00 到 2019-01-01 12:00:00 之间的数据。")
|
| 47 |
+
st.markdown("2. X 轴和 Y 轴分别表示时间和 PM2.5 浓度水平。")
|
| 48 |
+
st.markdown("3. 为每个静态传感器绘制一条折线,并用不同颜色进行区分。")
|
| 49 |
+
|
| 50 |
+
# 定义过滤时间段
|
| 51 |
+
start_dt = datetime(2019, 1, 1, 0, 0, 0)
|
| 52 |
+
end_dt = datetime(2019, 1, 1, 12, 0, 0)
|
| 53 |
|
| 54 |
+
# 获取静态数据文件夹中CSV文件
|
| 55 |
csv_files = [f for f in os.listdir(data_folder) if f.endswith('.csv')]
|
|
|
|
|
|
|
| 56 |
sensor_data = {}
|
|
|
|
|
|
|
| 57 |
for file in csv_files:
|
| 58 |
file_path = os.path.join(data_folder, file)
|
| 59 |
+
sensor_name = file.split('.')[0]
|
| 60 |
+
sensor_data[sensor_name] = load_and_filter_data(file_path, start_dt, end_dt)
|
| 61 |
|
| 62 |
+
# 整理数据:以时间戳为索引,每个传感器的PM2.5为一列
|
|
|
|
| 63 |
chart_data = pd.DataFrame()
|
|
|
|
| 64 |
for sensor, data in sensor_data.items():
|
|
|
|
| 65 |
chart_data[sensor] = data.set_index('timestamp')['pm2d5']
|
| 66 |
|
| 67 |
+
# 绘制折线图(添加图表标题及图例标签)
|
| 68 |
+
fig_line = px.line(chart_data,
|
| 69 |
+
title="PM2.5随时间变化折线图",
|
| 70 |
+
labels={"variable": "传感器", "value": "PM2.5", "index": "时间"})
|
| 71 |
+
fig_line.update_layout(xaxis_title="时间", yaxis_title="PM2.5水平")
|
| 72 |
+
st.plotly_chart(fig_line, use_container_width=True)
|
|
|
|
|
|
|
| 73 |
|
| 74 |
+
with st.expander("详细说明"):
|
| 75 |
+
st.markdown("**描述:** 本图展示了各静态传感器在2019年1月1日0:00至12:00期间的PM2.5浓度随时间变化的趋势。横轴表示时间,纵轴表示PM2.5数值,不同折线代表不同传感器的数据。")
|
| 76 |
+
st.markdown("**解读:** 曲线波动反映了空气质量的时段变化,峰值可能预示短期污染事件,而持续低值表明空气较为清洁。传感器数据对比有助于区域污染差异的分析。")
|
| 77 |
+
|
| 78 |
+
# 图(b): 车辆移动散点图(移动传感器数据)
|
| 79 |
with col2:
|
| 80 |
+
st.header("(b) A Scatter Plot that shows how vehicles carrying mobile sensors move in the city.")
|
| 81 |
+
with st.expander("绘制要求"):
|
| 82 |
+
st.markdown("1. 使用 mobile 文件夹中的 .csv 文件,并筛选出时间戳在 2019-01-02 10:00:00 到 2019-01-02 10:20:00 之间的数据。")
|
| 83 |
+
st.markdown("2. X 轴和 Y 轴分别表示经度和纬度。")
|
| 84 |
+
st.markdown("3. 使用散点图展示车辆传感器的位置,用不同颜色区分各传感器,并通过调整透明度(早期数据更透明)表达时间演变。")
|
| 85 |
+
|
| 86 |
+
# 定义mobile数据文件夹路径
|
| 87 |
+
mobile_folder = str(this_directory / "data/Cangzhou/Mobile")
|
| 88 |
+
csv_files_mobile = [f for f in os.listdir(mobile_folder) if f.endswith('.csv')]
|
| 89 |
+
mobile_sensor_data = {}
|
| 90 |
+
start_mobile = datetime(2019, 1, 2, 10, 0, 0)
|
| 91 |
+
end_mobile = datetime(2019, 1, 2, 10, 20, 0)
|
| 92 |
+
|
| 93 |
+
for file in csv_files_mobile:
|
| 94 |
+
sensor_name = file.split('.')[0]
|
| 95 |
+
file_path = os.path.join(mobile_folder, file)
|
| 96 |
+
df = load_and_filter_data(file_path, start_mobile, end_mobile)
|
| 97 |
+
if not df.empty:
|
| 98 |
+
mobile_sensor_data[sensor_name] = df
|
| 99 |
+
|
| 100 |
+
# 使用Plotly绘制散点图并根据时间调整透明度
|
| 101 |
+
fig2 = go.Figure()
|
| 102 |
+
colors = px.colors.qualitative.Plotly
|
| 103 |
+
def hex_to_rgba(hex_color, alpha):
|
| 104 |
+
hex_color = hex_color.lstrip('#')
|
| 105 |
+
r = int(hex_color[0:2], 16)
|
| 106 |
+
g = int(hex_color[2:4], 16)
|
| 107 |
+
b = int(hex_color[4:6], 16)
|
| 108 |
+
return f"rgba({r}, {g}, {b}, {alpha})"
|
| 109 |
+
|
| 110 |
+
total_time = (end_mobile - start_mobile).total_seconds()
|
| 111 |
+
sensor_index = 0
|
| 112 |
+
for sensor, data in mobile_sensor_data.items():
|
| 113 |
+
base_color = colors[sensor_index % len(colors)]
|
| 114 |
+
sensor_index += 1
|
| 115 |
+
custom_colors = []
|
| 116 |
+
for ts in data['timestamp']:
|
| 117 |
+
dt_seconds = (ts - start_mobile).total_seconds()
|
| 118 |
+
normalized = dt_seconds / total_time if total_time > 0 else 0
|
| 119 |
+
# 透明度:早期数据较透明(alpha值较低),后期较不透明
|
| 120 |
+
alpha = 0.3 + 0.7 * normalized
|
| 121 |
+
custom_colors.append(hex_to_rgba(base_color, alpha))
|
| 122 |
+
fig2.add_trace(go.Scatter(
|
| 123 |
+
x = data['lon'],
|
| 124 |
+
y = data['lat'],
|
| 125 |
+
mode = 'markers',
|
| 126 |
+
marker = dict(color = custom_colors, size = 10),
|
| 127 |
+
name = sensor
|
| 128 |
+
))
|
| 129 |
+
|
| 130 |
+
fig2.update_layout(
|
| 131 |
+
xaxis_title="经度",
|
| 132 |
+
yaxis_title="纬度",
|
| 133 |
+
title="车辆移动散点图"
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
st.plotly_chart(fig2, use_container_width=True)
|
| 137 |
+
|
| 138 |
+
with st.expander("详细说明"):
|
| 139 |
+
st.markdown("**描述:** 本图利用Mobile文件夹中的CSV数据,在2019年1月2日10:00至10:20期间展示车辆传感器的位置分布。横轴表示经度,纵轴表示纬度,不同颜色代表不同传感器。")
|
| 140 |
+
st.markdown("**解读:** 通过调整散点透明度(早期数据较透明),图中显示了车辆移动的时间演变趋势,为探索城市中车辆行驶路径提供依据。")
|
| 141 |
+
|
| 142 |
|
| 143 |
+
# 图(c): 第三个图表(预留,用于展示其他分析)
|
| 144 |
with col3:
|
| 145 |
+
st.header("(c) [预留] 第三个图表")
|
| 146 |
+
with st.expander("详细说明"):
|
| 147 |
+
st.markdown("**描述:** 此图为预留图表,可用于展示PM2.5分布或其他数据分析结果。")
|
| 148 |
+
st.markdown("**解读:** 后续将根据数据分析补充图表内容及详细解释。")
|
| 149 |
+
st.write("在这里添加您的第三个图表")
|