Vincentran commited on
Commit
7c8ba90
·
verified ·
1 Parent(s): 787fce2

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +198 -173
src/streamlit_app.py CHANGED
@@ -1,184 +1,209 @@
1
  import streamlit as st
 
 
2
  import pandas as pd
3
  import plotly.express as px
4
- import requests
5
- import numpy as np
6
-
7
- st.set_page_config(
8
- page_title="Global Technology Education Dashboard",
9
- page_icon="🎓",
10
- layout="wide"
11
- )
12
-
13
- API_URL = "https://data360api.worldbank.org/data360/data?DATABASE_ID=WB_EDSTATS&INDICATOR=WB_EDSTATS_UIS_FOSGP_5T8_F500600700&skip=0"
14
-
15
- @st.cache_data(ttl=86400)
16
- def load_data():
17
- response = requests.get(API_URL, timeout=30)
18
- response.raise_for_status()
19
-
20
- data = response.json()
21
-
22
- df = pd.DataFrame(data["value"])
23
-
24
- df["OBS_VALUE"] = pd.to_numeric(df["OBS_VALUE"], errors="coerce")
25
- df["TIME_PERIOD"] = pd.to_numeric(df["TIME_PERIOD"], errors="coerce")
26
-
27
- df = df.dropna(subset=["OBS_VALUE", "TIME_PERIOD"])
28
-
29
- return df
30
-
31
- df = load_data()
32
-
33
- st.title("🎓 Global Technology Education Dashboard")
34
- st.caption("Source: World Bank Education Statistics (Auto Updated)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  # Sidebar
37
- st.sidebar.header("Filters")
38
-
39
- countries = sorted(df["REF_AREA"].dropna().unique())
40
 
41
- selected_countries = st.sidebar.multiselect(
42
- "Select Countries",
43
- countries,
44
- default=["VNM"] if "VNM" in countries else countries[:3]
45
  )
46
 
47
- year_range = st.sidebar.slider(
48
- "Year Range",
49
- int(df["TIME_PERIOD"].min()),
50
- int(df["TIME_PERIOD"].max()),
51
- (
52
- int(df["TIME_PERIOD"].min()),
53
- int(df["TIME_PERIOD"].max())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  )
56
 
57
- filtered_df = df[
58
- (df["TIME_PERIOD"] >= year_range[0]) &
59
- (df["TIME_PERIOD"] <= year_range[1])
60
- ]
61
-
62
- # KPI Section
63
- latest_year = filtered_df["TIME_PERIOD"].max()
64
-
65
- latest_df = filtered_df[
66
- filtered_df["TIME_PERIOD"] == latest_year
67
- ]
68
-
69
- global_avg = latest_df["OBS_VALUE"].mean()
70
-
71
- top_country = latest_df.loc[
72
- latest_df["OBS_VALUE"].idxmax()
73
- ]
74
-
75
- col1, col2, col3, col4 = st.columns(4)
76
-
77
- col1.metric(
78
- "Countries",
79
- latest_df["REF_AREA"].nunique()
80
- )
81
-
82
- col2.metric(
83
- "Latest Year",
84
- int(latest_year)
85
- )
86
-
87
- col3.metric(
88
- "Global Average",
89
- f"{global_avg:.2f}"
90
- )
91
-
92
- col4.metric(
93
- "Top Country",
94
- top_country["REF_AREA"]
95
- )
96
-
97
- st.divider()
98
-
99
- # Global Trend
100
- st.subheader("📈 Global Trend")
101
-
102
- global_trend = (
103
- filtered_df
104
- .groupby("TIME_PERIOD")["OBS_VALUE"]
105
- .mean()
106
- .reset_index()
107
- )
108
-
109
- fig_trend = px.line(
110
- global_trend,
111
- x="TIME_PERIOD",
112
- y="OBS_VALUE",
113
- markers=True,
114
- title="Average Technology Education Indicator Over Time"
115
- )
116
-
117
- st.plotly_chart(fig_trend, use_container_width=True)
118
-
119
- # Country Comparison
120
- st.subheader("🌎 Country Comparison")
121
-
122
- compare_df = filtered_df[
123
- filtered_df["REF_AREA"].isin(selected_countries)
124
- ]
125
-
126
- fig_compare = px.line(
127
- compare_df,
128
- x="TIME_PERIOD",
129
- y="OBS_VALUE",
130
- color="REF_AREA",
131
- markers=True
132
- )
133
-
134
- st.plotly_chart(fig_compare, use_container_width=True)
135
-
136
- # Top Countries
137
- st.subheader("🏆 Top 20 Countries")
138
-
139
- top20 = (
140
- latest_df
141
- .sort_values("OBS_VALUE", ascending=False)
142
- .head(20)
143
- )
144
-
145
- fig_top = px.bar(
146
- top20,
147
- x="OBS_VALUE",
148
- y="REF_AREA",
149
- orientation="h"
150
- )
151
-
152
- st.plotly_chart(fig_top, use_container_width=True)
153
-
154
- # Distribution
155
- st.subheader("📊 Distribution")
156
-
157
- fig_hist = px.histogram(
158
- latest_df,
159
- x="OBS_VALUE",
160
- nbins=30
161
- )
162
-
163
- st.plotly_chart(fig_hist, use_container_width=True)
164
-
165
- # Data Explorer
166
- st.subheader("📋 Data Explorer")
167
-
168
- st.dataframe(
169
- filtered_df.sort_values(
170
- ["TIME_PERIOD", "REF_AREA"],
171
- ascending=[False, True]
172
- ),
173
- use_container_width=True
174
- )
175
-
176
- # Download CSV
177
- csv = filtered_df.to_csv(index=False)
178
-
179
- st.download_button(
180
- label="⬇ Download CSV",
181
- data=csv,
182
- file_name="technology_education_dashboard.csv",
183
- mime="text/csv"
184
- )
 
1
  import streamlit as st
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
  import pandas as pd
5
  import plotly.express as px
6
+ import plotly.graph_objects as go
7
+ from datetime import datetime
8
+ import time
9
+
10
+ st.set_page_config(page_title="Market Price Monitor", layout="wide")
11
+
12
+ # ==================== WEB SCRAPING FUNCTIONS ====================
13
+
14
+ def scrape_coinmarketcap():
15
+ """Scrape cryptocurrency prices từ CoinMarketCap"""
16
+ url = "https://coinmarketcap.com/"
17
+ headers = {"User-Agent": "Mozilla/5.0"}
18
+
19
+ try:
20
+ response = requests.get(url, headers=headers, timeout=10)
21
+ if response.status_code != 200:
22
+ return pd.DataFrame(), "Error fetching data"
23
+
24
+ soup = BeautifulSoup(response.content, "html.parser")
25
+ rows = soup.select("tbody tr")[:15]
26
+
27
+ data = []
28
+ for row in rows:
29
+ cols = row.find_all("td")
30
+ if len(cols) >= 7:
31
+ name = cols[2].find("p", class_=True).text.strip() if cols[2].find("p") else "N/A"
32
+ symbol = cols[2].find("p", class_="coin-item-symbol").text.strip() if cols[2].find("p", class_="coin-item-symbol") else "N/A"
33
+ price = cols[3].text.strip() if len(cols) > 3 else "N/A"
34
+ change_24h = cols[4].text.strip() if len(cols) > 4 else "N/A"
35
+ market_cap = cols[6].text.strip() if len(cols) > 6 else "N/A"
36
+
37
+ data.append({
38
+ "Name": name,
39
+ "Symbol": symbol,
40
+ "Price": price,
41
+ "24h Change": change_24h,
42
+ "Market Cap": market_cap
43
+ })
44
+
45
+ df = pd.DataFrame(data)
46
+ return df, None
47
+ except Exception as e:
48
+ return pd.DataFrame(), str(e)
49
+
50
+ def scrape_product_prices(product_urls):
51
+ """Scrape giá sản phẩm từ nhiều website (tùy chỉnh)"""
52
+ data = []
53
+
54
+ for url in product_urls:
55
+ try:
56
+ headers = {"User-Agent": "Mozilla/5.0"}
57
+ response = requests.get(url, headers=headers, timeout=10)
58
+ soup = BeautifulSoup(response.content, "html.parser")
59
+
60
+ # Tùy chỉnh selector theo website (ví dụ: Amazon, Shopee)
61
+ title = soup.find("span", class_="a-size-medium") or soup.find("h1")
62
+ price = soup.find("span", class_="a-price-whole") or soup.find("div", class_="price")
63
+
64
+ data.append({
65
+ "URL": url,
66
+ "Product": title.text.strip() if title else "Unknown",
67
+ "Price": price.text.strip() if price else "N/A",
68
+ "Scraped At": datetime.now().strftime("%Y-%m-%d %H:%M")
69
+ })
70
+ except Exception as e:
71
+ data.append({
72
+ "URL": url,
73
+ "Product": "Error",
74
+ "Price": f"Error: {str(e)}",
75
+ "Scraped At": datetime.now().strftime("%Y-%m-%d %H:%M")
76
+ })
77
+
78
+ return pd.DataFrame(data)
79
+
80
+ # ==================== STREAMLIT DASHBOARD ====================
81
+
82
+ st.title("📊 Market Price Monitor Dashboard")
83
+ st.markdown("Theo dõi giá thị trường thời gian thực - Web Scraping tự động")
84
 
85
  # Sidebar
86
+ st.sidebar.header("⚙️ Cài đặt")
 
 
87
 
88
+ # Chọn loại thị trường
89
+ market_type = st.sidebar.radio(
90
+ "Chọn thị trường:",
91
+ ["Cryptocurrency", "Sản phẩm E-commerce", "Cả hai"]
92
  )
93
 
94
+ auto_refresh = st.sidebar.checkbox("Tự động làm mới (30s)", value=False)
95
+ refresh_interval = st.sidebar.slider("Tần suất (giây)", 10, 120, 30)
96
+
97
+ # ==================== CRYPTOCURRENCY SECTION ====================
98
+ if market_type in ["Cryptocurrency", "Cả hai"]:
99
+ st.header("🪙 Cryptocurrency Prices")
100
+
101
+ col1, col2, col3 = st.columns(3)
102
+
103
+ if market_type == "Cryptocurrency":
104
+ df_crypto, error = scrape_coinmarketcap()
105
+
106
+ if error:
107
+ st.error(f"❌ Lỗi: {error}")
108
+ else:
109
+ # Metrics
110
+ with col1:
111
+ st.metric("Total Cryptos", len(df_crypto))
112
+ with col2:
113
+ avg_price = df_crypto["Price"].astype(str).str.replace(r"[^\d.]", "", regex=True).mean()
114
+ st.metric("Avg Price", f"${avg_price:.2f}" if avg_price else "N/A")
115
+ with col3:
116
+ top_gainer = df_crypto.loc[df_crypto["24h Change"].str.contains("+", na=False)].head(1)
117
+ if not top_gainer.empty:
118
+ st.metric("Top Gainer", f"{top_gainer['Name'].values[0]} ({top_gainer['24h Change'].values[0]})")
119
+ else:
120
+ st.metric("Top Gainer", "N/A")
121
+
122
+ # Data table
123
+ st.subheader("📋 Dữ liệu chi tiết")
124
+ st.dataframe(
125
+ df_crypto,
126
+ use_container_width=True,
127
+ hide_index=True
128
+ )
129
+
130
+ # Charts
131
+ col1, col2 = st.columns(2)
132
+
133
+ with col1:
134
+ st.subheader("📈 Top 10 by Price")
135
+ df_crypto_clean = df_crypto.copy()
136
+ df_crypto_clean["Price"].replace({r"[^\d.]": ""}, regex=True, inplace=True)
137
+ df_crypto_clean["Price"] = pd.to_numeric(df_crypto_clean["Price"], errors="coerce")
138
+ df_top10 = df_crypto_clean.nlargest(10, "Price")
139
+
140
+ fig_bar = px.bar(df_top10, x="Symbol", y="Price", color="Name",
141
+ title="Top 10 Crypto Prices",
142
+ labels={"Price": "Price (USD)"})
143
+ st.plotly_chart(fig_bar, use_container_width=True)
144
+
145
+ with col2:
146
+ st.subheader("🥧 Market Cap Distribution")
147
+ fig_pie = px.pie(df_crypto.head(10), names="Name", values="Market Cap",
148
+ title="Top 10 Market Cap")
149
+ st.plotly_chart(fig_pie, use_container_width=True)
150
+
151
+ # 24h Change chart
152
+ st.subheader("📊 24h Change (%)")
153
+ df_crypto_clean["24h Change"].replace({r"[^\d.-]": ""}, regex=True, inplace=True)
154
+ df_crypto_clean["24h Change"] = pd.to_numeric(df_crypto_clean["24h Change"], errors="coerce")
155
+
156
+ fig_change = px.bar(df_crypto_clean.head(15), x="Name", y="24h Change",
157
+ color="24h Change",
158
+ color_continuous_scale="RdYlGn",
159
+ title="24h Price Change (%)")
160
+ st.plotly_chart(fig_change, use_container_width=True)
161
+
162
+ # ==================== E-COMMERCE SECTION ====================
163
+ if market_type in ["Sản phẩm E-commerce", "Cả hai"]:
164
+ st.header("🛒 E-commerce Product Prices")
165
+
166
+ # Input URLs
167
+ st.subheader("🔗 Thêm URL sản phẩm")
168
+ url_input = st.text_area(
169
+ "Nhập URLs (mỗi dòng 1 URL):",
170
+ placeholder="https://amazon.com/product1\nhttps://shopee.vn/product2",
171
+ height=150
172
  )
173
+
174
+ if st.button("🔍 Scrape Prices", type="primary"):
175
+ if url_input.strip():
176
+ urls = [url.strip() for url in url_input.split("\n") if url.strip()]
177
+ with st.spinner("Đang scrape dữ liệu..."):
178
+ df_products = scrape_product_prices(urls)
179
+
180
+ st.subheader("📋 Kết quả")
181
+ st.dataframe(df_products, use_container_width=True, hide_index=True)
182
+
183
+ # Download CSV
184
+ csv = df_products.to_csv(index=False).encode("utf-8")
185
+ st.download_button(
186
+ "📥 Download CSV",
187
+ csv,
188
+ "market_prices.csv",
189
+ "text/csv"
190
+ )
191
+ else:
192
+ st.warning("⚠️ Vui lòng nhập ít nhất 1 URL")
193
+
194
+ # ==================== FOOTER ====================
195
+ st.markdown("---")
196
+ st.markdown(
197
+ f"""
198
+ <div style='text-align: center; color: gray;'>
199
+ 🔄 Cập nhật: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} |
200
+ Data scraped từ CoinMarketCap & E-commerce sites
201
+ </div>
202
+ """,
203
+ unsafe_allow_html=True
204
  )
205
 
206
+ # Auto-refresh
207
+ if auto_refresh:
208
+ time.sleep(refresh_interval)
209
+ st.rerun()