Spaces:
Paused
Paused
| import streamlit as st | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import pandas as pd | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| from datetime import datetime | |
| import time | |
| st.set_page_config(page_title="Market Price Monitor", layout="wide") | |
| # ==================== WEB SCRAPING FUNCTIONS ==================== | |
| def scrape_coinmarketcap(): | |
| """Scrape cryptocurrency prices từ CoinMarketCap""" | |
| url = "https://coinmarketcap.com/" | |
| headers = {"User-Agent": "Mozilla/5.0"} | |
| try: | |
| response = requests.get(url, headers=headers, timeout=10) | |
| if response.status_code != 200: | |
| return pd.DataFrame(), "Error fetching data" | |
| soup = BeautifulSoup(response.content, "html.parser") | |
| rows = soup.select("tbody tr")[:15] | |
| data = [] | |
| for row in rows: | |
| cols = row.find_all("td") | |
| if len(cols) >= 7: | |
| name = cols[2].find("p", class_=True).text.strip() if cols[2].find("p") else "N/A" | |
| symbol = cols[2].find("p", class_="coin-item-symbol").text.strip() if cols[2].find("p", class_="coin-item-symbol") else "N/A" | |
| price = cols[3].text.strip() if len(cols) > 3 else "N/A" | |
| change_24h = cols[4].text.strip() if len(cols) > 4 else "N/A" | |
| market_cap = cols[6].text.strip() if len(cols) > 6 else "N/A" | |
| data.append({ | |
| "Name": name, | |
| "Symbol": symbol, | |
| "Price": price, | |
| "24h Change": change_24h, | |
| "Market Cap": market_cap | |
| }) | |
| df = pd.DataFrame(data) | |
| return df, None | |
| except Exception as e: | |
| return pd.DataFrame(), str(e) | |
| def scrape_product_prices(product_urls): | |
| """Scrape giá sản phẩm từ nhiều website (tùy chỉnh)""" | |
| data = [] | |
| for url in product_urls: | |
| try: | |
| headers = {"User-Agent": "Mozilla/5.0"} | |
| response = requests.get(url, headers=headers, timeout=10) | |
| soup = BeautifulSoup(response.content, "html.parser") | |
| # Tùy chỉnh selector theo website (ví dụ: Amazon, Shopee) | |
| title = soup.find("span", class_="a-size-medium") or soup.find("h1") | |
| price = soup.find("span", class_="a-price-whole") or soup.find("div", class_="price") | |
| data.append({ | |
| "URL": url, | |
| "Product": title.text.strip() if title else "Unknown", | |
| "Price": price.text.strip() if price else "N/A", | |
| "Scraped At": datetime.now().strftime("%Y-%m-%d %H:%M") | |
| }) | |
| except Exception as e: | |
| data.append({ | |
| "URL": url, | |
| "Product": "Error", | |
| "Price": f"Error: {str(e)}", | |
| "Scraped At": datetime.now().strftime("%Y-%m-%d %H:%M") | |
| }) | |
| return pd.DataFrame(data) | |
| # ==================== STREAMLIT DASHBOARD ==================== | |
| st.title("📊 Market Price Monitor Dashboard") | |
| st.markdown("Theo dõi giá thị trường thời gian thực - Web Scraping tự động") | |
| # Sidebar | |
| st.sidebar.header("⚙️ Cài đặt") | |
| # Chọn loại thị trường | |
| market_type = st.sidebar.radio( | |
| "Chọn thị trường:", | |
| ["Cryptocurrency", "Sản phẩm E-commerce", "Cả hai"] | |
| ) | |
| auto_refresh = st.sidebar.checkbox("Tự động làm mới (30s)", value=False) | |
| refresh_interval = st.sidebar.slider("Tần suất (giây)", 10, 120, 30) | |
| # ==================== CRYPTOCURRENCY SECTION ==================== | |
| if market_type in ["Cryptocurrency", "Cả hai"]: | |
| st.header("🪙 Cryptocurrency Prices") | |
| col1, col2, col3 = st.columns(3) | |
| if market_type == "Cryptocurrency": | |
| df_crypto, error = scrape_coinmarketcap() | |
| if error: | |
| st.error(f"❌ Lỗi: {error}") | |
| else: | |
| # Metrics | |
| with col1: | |
| st.metric("Total Cryptos", len(df_crypto)) | |
| with col2: | |
| avg_price = df_crypto["Price"].astype(str).str.replace(r"[^\d.]", "", regex=True).mean() | |
| st.metric("Avg Price", f"${avg_price:.2f}" if avg_price else "N/A") | |
| with col3: | |
| top_gainer = df_crypto.loc[df_crypto["24h Change"].str.contains("+", na=False)].head(1) | |
| if not top_gainer.empty: | |
| st.metric("Top Gainer", f"{top_gainer['Name'].values[0]} ({top_gainer['24h Change'].values[0]})") | |
| else: | |
| st.metric("Top Gainer", "N/A") | |
| # Data table | |
| st.subheader("📋 Dữ liệu chi tiết") | |
| st.dataframe( | |
| df_crypto, | |
| use_container_width=True, | |
| hide_index=True | |
| ) | |
| # Charts | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.subheader("📈 Top 10 by Price") | |
| df_crypto_clean = df_crypto.copy() | |
| df_crypto_clean["Price"].replace({r"[^\d.]": ""}, regex=True, inplace=True) | |
| df_crypto_clean["Price"] = pd.to_numeric(df_crypto_clean["Price"], errors="coerce") | |
| df_top10 = df_crypto_clean.nlargest(10, "Price") | |
| fig_bar = px.bar(df_top10, x="Symbol", y="Price", color="Name", | |
| title="Top 10 Crypto Prices", | |
| labels={"Price": "Price (USD)"}) | |
| st.plotly_chart(fig_bar, use_container_width=True) | |
| with col2: | |
| st.subheader("🥧 Market Cap Distribution") | |
| fig_pie = px.pie(df_crypto.head(10), names="Name", values="Market Cap", | |
| title="Top 10 Market Cap") | |
| st.plotly_chart(fig_pie, use_container_width=True) | |
| # 24h Change chart | |
| st.subheader("📊 24h Change (%)") | |
| df_crypto_clean["24h Change"].replace({r"[^\d.-]": ""}, regex=True, inplace=True) | |
| df_crypto_clean["24h Change"] = pd.to_numeric(df_crypto_clean["24h Change"], errors="coerce") | |
| fig_change = px.bar(df_crypto_clean.head(15), x="Name", y="24h Change", | |
| color="24h Change", | |
| color_continuous_scale="RdYlGn", | |
| title="24h Price Change (%)") | |
| st.plotly_chart(fig_change, use_container_width=True) | |
| # ==================== E-COMMERCE SECTION ==================== | |
| if market_type in ["Sản phẩm E-commerce", "Cả hai"]: | |
| st.header("🛒 E-commerce Product Prices") | |
| # Input URLs | |
| st.subheader("🔗 Thêm URL sản phẩm") | |
| url_input = st.text_area( | |
| "Nhập URLs (mỗi dòng 1 URL):", | |
| placeholder="https://amazon.com/product1\nhttps://shopee.vn/product2", | |
| height=150 | |
| ) | |
| if st.button("🔍 Scrape Prices", type="primary"): | |
| if url_input.strip(): | |
| urls = [url.strip() for url in url_input.split("\n") if url.strip()] | |
| with st.spinner("Đang scrape dữ liệu..."): | |
| df_products = scrape_product_prices(urls) | |
| st.subheader("📋 Kết quả") | |
| st.dataframe(df_products, use_container_width=True, hide_index=True) | |
| # Download CSV | |
| csv = df_products.to_csv(index=False).encode("utf-8") | |
| st.download_button( | |
| "📥 Download CSV", | |
| csv, | |
| "market_prices.csv", | |
| "text/csv" | |
| ) | |
| else: | |
| st.warning("⚠️ Vui lòng nhập ít nhất 1 URL") | |
| # ==================== FOOTER ==================== | |
| st.markdown("---") | |
| st.markdown( | |
| f""" | |
| <div style='text-align: center; color: gray;'> | |
| 🔄 Cập nhật: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} | | |
| Data scraped từ CoinMarketCap & E-commerce sites | |
| </div> | |
| """, | |
| unsafe_allow_html=True | |
| ) | |
| # Auto-refresh | |
| if auto_refresh: | |
| time.sleep(refresh_interval) | |
| st.rerun() |