Update app.py
Browse files
app.py
CHANGED
|
@@ -3,26 +3,27 @@ import requests
|
|
| 3 |
from bs4 import BeautifulSoup
|
| 4 |
import re
|
| 5 |
|
| 6 |
-
def
|
| 7 |
url = "https://finance.naver.com/research/company_list.naver"
|
| 8 |
response = requests.get(url)
|
| 9 |
soup = BeautifulSoup(response.text, 'html.parser')
|
| 10 |
|
| 11 |
-
# ๋ชจ๋ PDF
|
| 12 |
-
pdf_links = soup.find_all('a', href=re.compile("
|
| 13 |
-
|
| 14 |
for link in pdf_links:
|
| 15 |
-
|
|
|
|
| 16 |
# ๋ค์ด๋ก๋ ๊ฐ๋ฅํ ๋งํฌ ํํ๋ก ์ ์ฅ
|
| 17 |
-
|
| 18 |
-
return
|
| 19 |
|
| 20 |
# Gradio ์ธํฐํ์ด์ค
|
| 21 |
with gr.Blocks() as app:
|
| 22 |
-
btn_fetch = gr.Button("PDF ๋งํฌ ์กฐํ")
|
| 23 |
-
output_links = gr.Dataframe(headers=["PDF ๋งํฌ"], interactive=False)
|
| 24 |
btn_fetch.click(
|
| 25 |
-
fn=
|
| 26 |
outputs=output_links
|
| 27 |
)
|
| 28 |
|
|
|
|
| 3 |
from bs4 import BeautifulSoup
|
| 4 |
import re
|
| 5 |
|
| 6 |
+
def fetch_pdf_links_and_titles():
|
| 7 |
url = "https://finance.naver.com/research/company_list.naver"
|
| 8 |
response = requests.get(url)
|
| 9 |
soup = BeautifulSoup(response.text, 'html.parser')
|
| 10 |
|
| 11 |
+
# ๋ชจ๋ PDF ๋งํฌ์ ์ ๋ชฉ์ ์ฐพ์ต๋๋ค.
|
| 12 |
+
pdf_links = soup.find_all('a', href=re.compile("^https://ssl.pstatic.net/imgstock/upload/research/company/.*\.pdf$"))
|
| 13 |
+
links_and_titles = []
|
| 14 |
for link in pdf_links:
|
| 15 |
+
title = link.text.strip() # ๋งํฌ ํ
์คํธ์์ ์ ๋ชฉ ์ถ์ถ
|
| 16 |
+
full_url = link['href']
|
| 17 |
# ๋ค์ด๋ก๋ ๊ฐ๋ฅํ ๋งํฌ ํํ๋ก ์ ์ฅ
|
| 18 |
+
links_and_titles.append([title, f"<a href='{full_url}' download='{full_url.split('/')[-1]}'>{full_url}</a>"])
|
| 19 |
+
return links_and_titles
|
| 20 |
|
| 21 |
# Gradio ์ธํฐํ์ด์ค
|
| 22 |
with gr.Blocks() as app:
|
| 23 |
+
btn_fetch = gr.Button("PDF ๋งํฌ ๋ฐ ์ ๋ณด ์กฐํ")
|
| 24 |
+
output_links = gr.Dataframe(headers=["Title", "PDF ๋งํฌ"], interactive=False)
|
| 25 |
btn_fetch.click(
|
| 26 |
+
fn=fetch_pdf_links_and_titles,
|
| 27 |
outputs=output_links
|
| 28 |
)
|
| 29 |
|