Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import requests
|
| 3 |
+
from bs4 import BeautifulSoup
|
| 4 |
+
import pandas as pd
|
| 5 |
+
|
| 6 |
+
def fetch_article(url):
|
| 7 |
+
response = requests.get(url)
|
| 8 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
| 9 |
+
title = soup.find('h1', id='caas-lead-header-undefined').text.strip()
|
| 10 |
+
content = soup.find('div', class_='caas-body').text.strip()
|
| 11 |
+
return title, content
|
| 12 |
+
|
| 13 |
+
def main():
|
| 14 |
+
st.set_page_config(page_title="Yahoo新聞爬蟲", page_icon="📰", layout="wide")
|
| 15 |
+
|
| 16 |
+
st.title("Yahoo新聞爬蟲 📰")
|
| 17 |
+
st.markdown("這個應用程式可以爬取Yahoo新聞的標題和內容。")
|
| 18 |
+
|
| 19 |
+
url = st.text_input("請輸入Yahoo新聞的URL:",
|
| 20 |
+
"https://tw.news.yahoo.com/%E5%8F%B0%E8%82%A1%E8%A1%80%E6%B4%971807%E9%BB%9E-%E8%AD%89%E4%BA%A4%E6%89%80%E7%B8%BD%E5%BA%A7%E6%8F%AD-%E6%9A%B4%E8%B7%8C%E5%8E%9F%E5%9B%A0-%E5%BF%85%E8%A6%81%E6%99%82%E6%8E%A1%E5%8F%96%E7%A9%A9%E5%AE%9A%E5%B8%82%E5%A0%B4%E6%8E%AA%E6%96%BD-071522182.html")
|
| 21 |
+
|
| 22 |
+
if st.button("爬取新聞"):
|
| 23 |
+
with st.spinner("正在爬取新聞..."):
|
| 24 |
+
try:
|
| 25 |
+
title, content = fetch_article(url)
|
| 26 |
+
|
| 27 |
+
df = pd.DataFrame({
|
| 28 |
+
'標題': [title],
|
| 29 |
+
'內容': [content]
|
| 30 |
+
})
|
| 31 |
+
|
| 32 |
+
st.success("爬取成功!")
|
| 33 |
+
|
| 34 |
+
st.subheader("新聞標題")
|
| 35 |
+
st.write(title)
|
| 36 |
+
|
| 37 |
+
st.subheader("新聞內容")
|
| 38 |
+
st.write(content)
|
| 39 |
+
|
| 40 |
+
st.subheader("DataFrame 預覽")
|
| 41 |
+
st.dataframe(df)
|
| 42 |
+
|
| 43 |
+
csv = df.to_csv(index=False).encode('utf-8')
|
| 44 |
+
st.download_button(
|
| 45 |
+
label="下載 CSV 檔案",
|
| 46 |
+
data=csv,
|
| 47 |
+
file_name="yahoo_news.csv",
|
| 48 |
+
mime="text/csv",
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
except Exception as e:
|
| 52 |
+
st.error(f"爬取失敗:{str(e)}")
|
| 53 |
+
|
| 54 |
+
if __name__ == "__main__":
|
| 55 |
+
main()
|