Spaces:
Sleeping
Sleeping
| import requests | |
| from bs4 import BeautifulSoup | |
| import pandas as pd | |
| import streamlit as st | |
| import random | |
| import time # time ๋ชจ๋์ ์ํฌํธ | |
| # ๋ค์ด๋ฒ ๋ชจ๋ฐ์ผ ๋ด์ค ๋ญํน URL | |
| url = "https://m.news.naver.com/rankingList" | |
| # ํค๋ ์ค์ (User-Agent ๋ฐ Referer ์ถ๊ฐ) | |
| headers = { | |
| "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36", | |
| "Referer": "https://m.news.naver.com/" | |
| } | |
| # ๋๋ค ๋๋ ์ด ํจ์ | |
| def random_delay(min_delay=1, max_delay=3): | |
| delay = random.uniform(min_delay, max_delay) | |
| time.sleep(delay) # time ๋ชจ๋์ sleep ํจ์ ์ฌ์ฉ | |
| # ์น ํ์ด์ง ์์ฒญ ๋ฐ ํ์ฑ | |
| response = requests.get(url, headers=headers) | |
| soup = BeautifulSoup(response.content, 'html.parser') | |
| # ๋ด์ค ๋ฆฌ์คํธ ์ถ์ถ | |
| news_list = [] | |
| # ์๋ก์ด HTML ๊ตฌ์กฐ์ ๋ง๊ฒ ๋ฐ์ดํฐ ์ถ์ถ | |
| for news_box in soup.select('div.rankingnews_box'): | |
| # ์ธ๋ก ์ฌ ์ด๋ฆ ์ถ์ถ | |
| press_name = news_box.find('strong', class_='rankingnews_name').text.strip() | |
| # ๊ฐ ๋ด์ค ํญ๋ชฉ์ ๋ฆฌ์คํธ๋ก ์ถ์ถ | |
| for news_item in news_box.select('ul.rankingnews_list li'): | |
| random_delay() # ๋๋ ์ด ์ถ๊ฐ | |
| # ์์ ๋ฒํธ ์ถ์ถ | |
| rank_tag = news_item.find('em', class_='list_ranking_num') | |
| rank = rank_tag.text if rank_tag else 'No Rank' | |
| # ์ ๋ชฉ ์ถ์ถ | |
| title_tag = news_item.find('strong', class_='list_title') | |
| title = title_tag.text.strip() if title_tag else 'No Title' | |
| # ๋งํฌ ์ถ์ถ | |
| link = news_item.find('a')['href'] if news_item.find('a') else '#' | |
| # ์๊ฐ ์ถ์ถ | |
| time_tag = news_item.find('span', class_='list_time') | |
| time_info = time_tag.text.strip() if time_tag else 'No Time' | |
| # ์ด๋ฏธ์ง URL ์ถ์ถ | |
| img_tag = news_item.find('img') | |
| image_url = img_tag['src'] if img_tag and 'src' in img_tag.attrs else 'No Image Available' | |
| # ๋ฐ์ดํฐ ๋ฆฌ์คํธ์ ์ถ๊ฐ | |
| news_list.append({ | |
| 'Press': press_name, | |
| 'Rank': rank, | |
| 'Title': title, | |
| 'Link': link, | |
| 'Time': time_info, | |
| 'Image URL': image_url | |
| }) | |
| # ๋ฐ์ดํฐํ๋ ์์ผ๋ก ๋ณํ | |
| df = pd.DataFrame(news_list) | |
| # Streamlit์์ ๊ฒฐ๊ณผ ํ์ | |
| st.title("Naver Mobile Ranking News Scraper") | |
| # ๋ฐ๋ํ ๋ชจ์์ผ๋ก ๋ด์ค๋ฅผ 3๊ฐ์ฉ ๋ฐฐ์น (3์ด ๊ทธ๋ฆฌ๋) | |
| columns_per_row = 3 | |
| # ๋ด์ค ํญ๋ชฉ์ ๋ฐ๋ํ ํํ๋ก ์ถ๋ ฅ | |
| for i in range(0, len(df), columns_per_row): | |
| cols = st.columns(columns_per_row) # 3์ด๋ก ๊ทธ๋ฆฌ๋ ์์ฑ | |
| # ๊ฐ ์ด์ ๋ด์ค ๋ฐฐ์น | |
| for idx, col in enumerate(cols): | |
| if i + idx < len(df): | |
| row = df.iloc[i + idx] | |
| with col: | |
| # ์ด๋ฏธ์ง๊ฐ ์์ ๊ฒฝ์ฐ ์ถ๋ ฅ | |
| if row['Image URL'] != 'No Image Available': | |
| st.image(row['Image URL'], use_column_width=True) | |
| # ๋ด์ค ์ ๋ชฉ ํ์ดํผ๋งํฌ | |
| st.markdown(f"**[{row['Title']}]({row['Link']})**") | |
| # ๋ด์ค ์ธ๋ก ์ฌ์ ์๊ฐ ์ ๋ณด | |
| st.write(f"Press: {row['Press']} | Time: {row['Time']}") | |