| import gradio as gr |
| import requests |
| from bs4 import BeautifulSoup |
| import pandas as pd |
| import logging |
| import io |
| import tempfile |
| from openpyxl import load_workbook |
| from openpyxl.styles import Font |
|
|
| |
| logging.basicConfig(level=logging.DEBUG) |
|
|
| def format_change_web(text): |
| """ |
| μ μΌλΉ λ° λ±λ½λ₯ ν
μ€νΈλ₯Ό μμ΄μ½κ³Ό μμμΌλ‘ ν¬λ§·ν
ν©λλ€. |
| μμΉ: λΆμμ, νλ½: νλμ |
| μΉ μΈν°νμ΄μ€μ© HTML ν¬λ§·ν
|
| """ |
| if "μνκ°" in text: |
| number = ''.join(filter(lambda x: x.isdigit() or x == '-', text)) |
| return f'<span style="color:red;">β{number}</span>' |
| elif "μμΉ" in text: |
| number = ''.join(filter(lambda x: x.isdigit() or x == '-', text)) |
| return f'<span style="color:red;">β²{number}</span>' |
| elif "ννκ°" in text: |
| number = ''.join(filter(lambda x: x.isdigit() or x == '-', text)) |
| return f'<span style="color:blue;">β{number}</span>' |
| elif "νλ½" in text: |
| number = ''.join(filter(lambda x: x.isdigit() or x == '-', text)) |
| return f'<span style="color:blue;">βΌ{number}</span>' |
| elif text.startswith('+'): |
| return f'<span style="color:red;">β²{text}</span>' |
| elif text.startswith('-'): |
| return f'<span style="color:blue;">βΌ{text}</span>' |
| else: |
| return text |
|
|
| def format_change_excel(text): |
| """ |
| μ μΌλΉ λ° λ±λ½λ₯ ν
μ€νΈλ₯Ό μμ΄μ½μΌλ‘ ν¬λ§·ν
ν©λλ€. |
| μμΉ: β², β |
| νλ½: βΌ, β |
| μμ
μ© ν
μ€νΈ ν¬λ§·ν
(HTML νκ·Έ μμ) |
| """ |
| if "μνκ°" in text: |
| number = ''.join(filter(lambda x: x.isdigit() or x == '-', text)) |
| return f'β{number}' |
| elif "μμΉ" in text: |
| number = ''.join(filter(lambda x: x.isdigit() or x == '-', text)) |
| return f'β²{number}' |
| elif "ννκ°" in text: |
| number = ''.join(filter(lambda x: x.isdigit() or x == '-', text)) |
| return f'β{number}' |
| elif "νλ½" in text: |
| number = ''.join(filter(lambda x: x.isdigit() or x == '-', text)) |
| return f'βΌ{number}' |
| elif text.startswith('+'): |
| return f'β²{text}' |
| elif text.startswith('-'): |
| return f'βΌ{text}' |
| else: |
| return text |
|
|
| def scrape_market(market): |
| if market == "μ½μ€λ₯": |
| url = "https://finance.naver.com/sise/sise_rise.naver?sosok=1" |
| elif market == "μ½μ€νΌ": |
| url = "https://finance.naver.com/sise/sise_rise.naver?sosok=0" |
| else: |
| logging.error("μλͺ»λ μμ₯ μ ν") |
| return "μλͺ»λ μμ₯μ μ ννμ
¨μ΅λλ€.", None |
|
|
| logging.debug(f"μμ²ν URL: {url}") |
|
|
| try: |
| response = requests.get(url) |
| response.raise_for_status() |
| logging.debug("μΉ νμ΄μ§ μμ² μ±κ³΅") |
| except requests.exceptions.RequestException as e: |
| logging.error(f"μΉ νμ΄μ§ μμ² μ€ν¨: {e}") |
| return f"λ°μ΄ν°λ₯Ό κ°μ Έμ€λ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {e}", None |
|
|
| soup = BeautifulSoup(response.text, "html.parser") |
| table = soup.find("table", {"class": "type_2"}) |
|
|
| if not table: |
| logging.error("ν
μ΄λΈμ μ°Ύμ μ μμ΅λλ€.") |
| return "ν
μ΄λΈμ μ°Ύμ μ μμ΅λλ€.", None |
|
|
| rows = table.find_all("tr") |
| logging.debug(f"μ΄ ν μ: {len(rows)}") |
|
|
| data_display = [] |
| data_excel = [] |
| headers = ["N", "μ’
λͺ©λͺ
", "νμ¬κ°", "μ μΌλΉ", "λ±λ½λ₯ ", "κ±°λλ", |
| "λ§€μνΈκ°", "λ§€λνΈκ°", "λ§€μμ΄μλ", "λ§€λμ΄μλ", "PER", "ROE"] |
|
|
| for row in rows[2:]: |
| cols = row.find_all("td") |
| if len(cols) < 12: |
| logging.debug("λ°μ΄ν°κ° λΆμ‘±ν νμ 건λλλλ€.") |
| continue |
| try: |
| n = cols[0].get_text(strip=True) |
| μ’
λͺ©λͺ
= cols[1].find("a").get_text(strip=True) |
| νμ¬κ° = cols[2].get_text(strip=True).replace(',', '') |
| μ μΌλΉ_raw = cols[3].get_text(strip=True) |
| λ±λ½λ₯ _raw = cols[4].get_text(strip=True) |
| κ±°λλ = cols[5].get_text(strip=True).replace(',', '') |
| λ§€μνΈκ° = cols[6].get_text(strip=True).replace(',', '') |
| λ§€λνΈκ° = cols[7].get_text(strip=True).replace(',', '') |
| λ§€μμ΄μλ = cols[8].get_text(strip=True).replace(',', '') |
| λ§€λμ΄μλ = cols[9].get_text(strip=True).replace(',', '') |
| PER = cols[10].get_text(strip=True) |
| ROE = cols[11].get_text(strip=True) |
| |
| |
| μ μΌλΉ_web = format_change_web(μ μΌλΉ_raw) |
| λ±λ½λ₯ _web = format_change_web(λ±λ½λ₯ _raw) |
| μ μΌλΉ_excel = format_change_excel(μ μΌλΉ_raw) |
| λ±λ½λ₯ _excel = format_change_excel(λ±λ½λ₯ _raw) |
| |
| logging.debug(f"μΆμΆ λ°μ΄ν° - N: {n}, μ’
λͺ©λͺ
: {μ’
λͺ©λͺ
}, νμ¬κ°: {νμ¬κ°}, μ μΌλΉ: {μ μΌλΉ_raw} -> {μ μΌλΉ_excel}, λ±λ½λ₯ : {λ±λ½λ₯ _raw} -> {λ±λ½λ₯ _excel}, κ±°λλ: {κ±°λλ}, λ§€μνΈκ°: {λ§€μνΈκ°}, λ§€λνΈκ°: {λ§€λνΈκ°}, λ§€μμ΄μλ: {λ§€μμ΄μλ}, λ§€λμ΄μλ: {λ§€λμ΄μλ}, PER: {PER}, ROE: {ROE}") |
| |
| |
| data_display.append([n, μ’
λͺ©λͺ
, νμ¬κ°, μ μΌλΉ_web, λ±λ½λ₯ _web, κ±°λλ, |
| λ§€μνΈκ°, λ§€λνΈκ°, λ§€μμ΄μλ, λ§€λμ΄μλ, PER, ROE]) |
| |
| |
| data_excel.append([n, μ’
λͺ©λͺ
, νμ¬κ°, μ μΌλΉ_excel, λ±λ½λ₯ _excel, κ±°λλ, |
| λ§€μνΈκ°, λ§€λνΈκ°, λ§€μμ΄μλ, λ§€λμ΄μλ, PER, ROE]) |
| except Exception as e: |
| logging.error(f"λ°μ΄ν° μΆμΆ μ€ μ€λ₯ λ°μ: {e}") |
| continue |
|
|
| if not data_display: |
| logging.error("μΆμΆλ λ°μ΄ν°κ° μμ΅λλ€.") |
| return "μΆμΆλ λ°μ΄ν°κ° μμ΅λλ€.", None |
|
|
| |
| df_display = pd.DataFrame(data_display, columns=headers) |
| logging.debug("μΉμ© λ°μ΄ν°νλ μ μμ± μλ£") |
| |
| |
| df_excel = pd.DataFrame(data_excel, columns=headers) |
| logging.debug("μμ
μ© λ°μ΄ν°νλ μ μμ± μλ£") |
|
|
| |
| html = """ |
| <style> |
| table { |
| width: 100%; |
| border-collapse: collapse; |
| } |
| th, td { |
| border: 1px solid #dddddd; |
| text-align: center; |
| padding: 8px; |
| } |
| th { |
| background-color: #f2f2f2; |
| } |
| </style> |
| <table> |
| <tr> |
| <th>N</th> |
| <th>μ’
λͺ©λͺ
</th> |
| <th>νμ¬κ°</th> |
| <th>μ μΌλΉ</th> |
| <th>λ±λ½λ₯ </th> |
| <th>κ±°λλ</th> |
| <th>λ§€μνΈκ°</th> |
| <th>λ§€λνΈκ°</th> |
| <th>λ§€μμ΄μλ</th> |
| <th>λ§€λμ΄μλ</th> |
| <th>PER</th> |
| <th>ROE</th> |
| </tr> |
| """ |
|
|
| for index, row in df_display.iterrows(): |
| html += f""" |
| <tr> |
| <td>{row['N']}</td> |
| <td>{row['μ’
λͺ©λͺ
']}</td> |
| <td>{row['νμ¬κ°']}</td> |
| <td>{row['μ μΌλΉ']}</td> |
| <td>{row['λ±λ½λ₯ ']}</td> |
| <td>{row['κ±°λλ']}</td> |
| <td>{row['λ§€μνΈκ°']}</td> |
| <td>{row['λ§€λνΈκ°']}</td> |
| <td>{row['λ§€μμ΄μλ']}</td> |
| <td>{row['λ§€λμ΄μλ']}</td> |
| <td>{row['PER']}</td> |
| <td>{row['ROE']}</td> |
| </tr> |
| """ |
| |
| html += "</table>" |
| logging.debug("HTML ν
μ΄λΈ μμ± μλ£") |
|
|
| |
| try: |
| excel_buffer = io.BytesIO() |
| df_excel.to_excel(excel_buffer, index=False, engine='openpyxl') |
| excel_buffer.seek(0) |
|
|
| |
| wb = load_workbook(excel_buffer) |
| ws = wb.active |
|
|
| |
| for row in ws.iter_rows(min_row=2, min_col=4, max_col=5, max_row=ws.max_row): |
| μ μΌλΉ_cell, λ±λ½λ₯ _cell = row |
| |
| if μ μΌλΉ_cell.value.startswith('β²') or μ μΌλΉ_cell.value.startswith('β'): |
| μ μΌλΉ_cell.font = Font(color="FF0000") |
| elif μ μΌλΉ_cell.value.startswith('βΌ') or μ μΌλΉ_cell.value.startswith('β'): |
| μ μΌλΉ_cell.font = Font(color="0000FF") |
| |
| |
| if λ±λ½λ₯ _cell.value.startswith('β²') or λ±λ½λ₯ _cell.value.startswith('β'): |
| λ±λ½λ₯ _cell.font = Font(color="FF0000") |
| elif λ±λ½λ₯ _cell.value.startswith('βΌ') or λ±λ½λ₯ _cell.value.startswith('β'): |
| λ±λ½λ₯ _cell.font = Font(color="0000FF") |
|
|
| |
| final_excel = io.BytesIO() |
| wb.save(final_excel) |
| final_excel.seek(0) |
|
|
| |
| temp = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx") |
| temp.write(final_excel.read()) |
| temp.close() |
| logging.debug("μμ
νμΌ μμ± λ° μ μ₯ μλ£") |
| except Exception as e: |
| logging.error(f"μμ
νμΌ μμ± μ€ μ€λ₯ λ°μ: {e}") |
| temp = None |
|
|
| return html, temp.name if temp else None |
|
|
| with gr.Blocks() as demo: |
| gr.Markdown("### λ€μ΄λ² μ¦κΆ μ½μ€λ₯/μ½μ€νΌ μ’
λͺ© μ 보 μ€ν¬λν") |
| |
| with gr.Row(): |
| market_choice = gr.Radio( |
| choices=["μ½μ€λ₯", "μ½μ€νΌ"], |
| label="μμ₯ μ ν", |
| value="μ½μ€λ₯" |
| ) |
| |
| btn = gr.Button("λ°μ΄ν° κ°μ Έμ€κΈ°") |
| |
| with gr.Row(): |
| output_html = gr.HTML() |
| output_file = gr.File(label="μμ
νμΌ λ€μ΄λ‘λ") |
| |
| btn.click(scrape_market, inputs=market_choice, outputs=[output_html, output_file]) |
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|