unnastyle's picture
Update app.py
c44d20a verified
import re
import requests
import pandas as pd
import gradio as gr
# ๋””๋ฒ„๊น…: ์Šคํฌ๋ž˜ํ•‘ ๊ณผ์ •์„ ๋กœ๊ทธ๋กœ ํ™•์ธํ•˜๊ธฐ ์œ„ํ•จ
def debug_print(*args):
print("[DEBUG]", *args)
def scrape_data(market_type: str):
"""
market_type (str): '0' -> ์ฝ”์Šคํ”ผ, '1' -> ์ฝ”์Šค๋‹ฅ
๋„ค์ด๋ฒ„ ์ฆ๊ถŒ์˜ '์ƒ์Šน ์ข…๋ชฉ' ํŽ˜์ด์ง€์—์„œ ํ•ด๋‹น market_type์˜ ์ •๋ณด๋ฅผ ์Šคํฌ๋ž˜ํ•‘.
(BeautifulSoup / lxml ์—†์ด ์ •๊ทœํ‘œํ˜„์‹๋งŒ ์‚ฌ์šฉ)
"""
# market_type์— ๋”ฐ๋ผ URL ์„ค์ •
# '0'์ด๋ฉด ์ฝ”์Šคํ”ผ, '1'์ด๋ฉด ์ฝ”์Šค๋‹ฅ
base_url = "https://finance.naver.com/sise/sise_rise.naver?sosok="
url = base_url + market_type
debug_print("Requesting URL:", url)
response = requests.get(url)
debug_print("Status Code:", response.status_code)
# HTML ์ „์ฒด ํ…์ŠคํŠธ
html_text = response.text
# 1) <table class="type_2"> ~ </table> ๊ตฌ๊ฐ„ ์ถ”์ถœ
pattern_table = re.compile(
r'<table[^>]*class=["\']type_2["\'][^>]*>(.*?)</table>',
re.DOTALL | re.IGNORECASE
)
match_table = pattern_table.search(html_text)
if not match_table:
debug_print("Error: Target table not found.")
return []
table_html = match_table.group(1)
# 2) table ๋‚ด๋ถ€์˜ <tr> ๋‹จ์œ„๋กœ ๋‚˜๋ˆ„๊ธฐ
pattern_tr = re.compile(r'<tr[^>]*>(.*?)</tr>', re.DOTALL | re.IGNORECASE)
rows = pattern_tr.findall(table_html)
debug_print(f"Found total {len(rows)} <tr> blocks in table.")
data_list = []
row_count = 0
for row_html in rows:
# 3) <td> ํƒœ๊ทธ ์ถ”์ถœ
pattern_td = re.compile(r'<td[^>]*>(.*?)</td>', re.DOTALL | re.IGNORECASE)
cols = pattern_td.findall(row_html)
# ์œ ํšจํ•œ ๋ฐ์ดํ„ฐ ์—ด์ด ์•„๋‹Œ ๊ฒฝ์šฐ(๋˜๋Š” ๊ณต๋ฐฑํ–‰ ๋“ฑ) ์Šคํ‚ต
if len(cols) < 12:
continue
# HTML ํƒœ๊ทธ ์ œ๊ฑฐ ํ—ฌํผ ํ•จ์ˆ˜
def clean_html(raw_html):
# ๋ชจ๋“  ํƒœ๊ทธ ์ œ๊ฑฐ
text = re.sub(r'<.*?>', '', raw_html, flags=re.DOTALL)
return text.strip()
rank = clean_html(cols[0])
name = clean_html(cols[1])
current = clean_html(cols[2])
diff = clean_html(cols[3])
change_rate = clean_html(cols[4])
volume = clean_html(cols[5])
buy_price = clean_html(cols[6])
sell_price = clean_html(cols[7])
total_buy = clean_html(cols[8])
total_sell = clean_html(cols[9])
per_ = clean_html(cols[10])
roe_ = clean_html(cols[11])
debug_print(f"[Row {row_count}] rank={rank}, name={name}, current_price={current}")
row_count += 1
data_list.append({
"์ˆœ์œ„": rank,
"์ข…๋ชฉ๋ช…": name,
"ํ˜„์žฌ๊ฐ€": current,
"์ „์ผ๋น„": diff,
"๋“ฑ๋ฝ๋ฅ ": change_rate,
"๊ฑฐ๋ž˜๋Ÿ‰": volume,
"๋งค์ˆ˜ํ˜ธ๊ฐ€": buy_price,
"๋งค๋„ํ˜ธ๊ฐ€": sell_price,
"๋งค์ˆ˜์ด์ž”๋Ÿ‰": total_buy,
"๋งค๋„์ด์ž”๋Ÿ‰": total_sell,
"PER": per_,
"ROE": roe_
})
return data_list
def make_table(market_choice):
"""
market_choice (str): "์ฝ”์Šคํ”ผ" ๋˜๋Š” "์ฝ”์Šค๋‹ฅ"
ํ•ด๋‹น ์„ ํƒ์— ๋”ฐ๋ผ scrape_data()๋ฅผ ์‹คํ–‰ํ•œ ๋’ค DataFrame์œผ๋กœ ๋ฐ˜ํ™˜.
"""
debug_print(f"Scraping data for market_choice={market_choice}...")
# ์‚ฌ์šฉ์ž๊ฐ€ ์„ ํƒ๋ฐ•์Šค์—์„œ "์ฝ”์Šคํ”ผ" ์„ ํƒ -> '0'
# ์‚ฌ์šฉ์ž๊ฐ€ ์„ ํƒ๋ฐ•์Šค์—์„œ "์ฝ”์Šค๋‹ฅ" ์„ ํƒ -> '1'
market_type = "0" if market_choice == "์ฝ”์Šคํ”ผ" else "1"
data = scrape_data(market_type)
if not data:
debug_print("No data retrieved or table not found.")
return pd.DataFrame(["๋ฐ์ดํ„ฐ๋ฅผ ๊ฐ€์ ธ์˜ค์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค."])
debug_print("Scraping done. Converting to DataFrame.")
return pd.DataFrame(data)
def main():
"""
Gradio ์ธํ„ฐํŽ˜์ด์Šค ์‹คํ–‰
"""
with gr.Blocks() as demo:
gr.Markdown("# ๋„ค์ด๋ฒ„ ์ฆ๊ถŒ ์Šคํฌ๋ž˜ํ•‘ : ์ฝ”์Šคํ”ผ / ์ฝ”์Šค๋‹ฅ ์„ ํƒ")
# ์„ ํƒ๋ฐ•์Šค: ์ฝ”์Šคํ”ผ / ์ฝ”์Šค๋‹ฅ
market_choice = gr.Dropdown(
label="์ข…๋ชฉ ์„ ํƒ",
choices=["์ฝ”์Šคํ”ผ", "์ฝ”์Šค๋‹ฅ"],
value="์ฝ”์Šคํ”ผ" # ๊ธฐ๋ณธ๊ฐ’
)
# ๋ฒ„ํŠผ๊ณผ ๊ฒฐ๊ณผ์šฉ DataFrame
scrape_btn = gr.Button("๋ฐ์ดํ„ฐ ๊ฐ€์ ธ์˜ค๊ธฐ")
output_df = gr.DataFrame(label="์Šคํฌ๋ž˜ํ•‘ ๊ฒฐ๊ณผ")
# ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ ์Šคํฌ๋ž˜ํ•‘ ํ•จ์ˆ˜ ํ˜ธ์ถœ
scrape_btn.click(fn=make_table,
inputs=market_choice,
outputs=output_df)
demo.launch()
if __name__ == "__main__":
main()