JERNGOC commited on
Commit
93dd59c
·
verified ·
1 Parent(s): ff789b4

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -0
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ import pandas as pd
5
+
6
+ def fetch_article(url):
7
+ response = requests.get(url)
8
+ soup = BeautifulSoup(response.content, 'html.parser')
9
+ title = soup.find('h1', id='caas-lead-header-undefined').text.strip()
10
+ content = soup.find('div', class_='caas-body').text.strip()
11
+ return title, content
12
+
13
+ def main():
14
+ st.set_page_config(page_title="Yahoo新聞爬蟲", page_icon="📰", layout="wide")
15
+
16
+ st.title("Yahoo新聞爬蟲 📰")
17
+ st.markdown("這個應用程式可以爬取Yahoo新聞的標題和內容。")
18
+
19
+ url = st.text_input("請輸入Yahoo新聞的URL:",
20
+ "https://tw.news.yahoo.com/%E5%8F%B0%E8%82%A1%E8%A1%80%E6%B4%971807%E9%BB%9E-%E8%AD%89%E4%BA%A4%E6%89%80%E7%B8%BD%E5%BA%A7%E6%8F%AD-%E6%9A%B4%E8%B7%8C%E5%8E%9F%E5%9B%A0-%E5%BF%85%E8%A6%81%E6%99%82%E6%8E%A1%E5%8F%96%E7%A9%A9%E5%AE%9A%E5%B8%82%E5%A0%B4%E6%8E%AA%E6%96%BD-071522182.html")
21
+
22
+ if st.button("爬取新聞"):
23
+ with st.spinner("正在爬取新聞..."):
24
+ try:
25
+ title, content = fetch_article(url)
26
+
27
+ df = pd.DataFrame({
28
+ '標題': [title],
29
+ '內容': [content]
30
+ })
31
+
32
+ st.success("爬取成功!")
33
+
34
+ st.subheader("新聞標題")
35
+ st.write(title)
36
+
37
+ st.subheader("新聞內容")
38
+ st.write(content)
39
+
40
+ st.subheader("DataFrame 預覽")
41
+ st.dataframe(df)
42
+
43
+ csv = df.to_csv(index=False).encode('utf-8')
44
+ st.download_button(
45
+ label="下載 CSV 檔案",
46
+ data=csv,
47
+ file_name="yahoo_news.csv",
48
+ mime="text/csv",
49
+ )
50
+
51
+ except Exception as e:
52
+ st.error(f"爬取失敗:{str(e)}")
53
+
54
+ if __name__ == "__main__":
55
+ main()