Update app.py
Browse files
app.py
CHANGED
|
@@ -8,14 +8,17 @@ def fetch_pdf_links_and_titles():
|
|
| 8 |
response = requests.get(url)
|
| 9 |
soup = BeautifulSoup(response.text, 'html.parser')
|
| 10 |
|
|
|
|
|
|
|
| 11 |
# ๋ชจ๋ PDF ๋งํฌ์ ์ ๋ชฉ์ ์ฐพ์ต๋๋ค.
|
| 12 |
pdf_links = soup.find_all('a', href=re.compile("^https://ssl.pstatic.net/imgstock/upload/research/company/.*\.pdf$"))
|
| 13 |
-
links_and_titles = []
|
| 14 |
for link in pdf_links:
|
| 15 |
title = link.text.strip() # ๋งํฌ ํ
์คํธ์์ ์ ๋ชฉ ์ถ์ถ
|
| 16 |
full_url = link['href']
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
| 19 |
return links_and_titles
|
| 20 |
|
| 21 |
# Gradio ์ธํฐํ์ด์ค
|
|
|
|
| 8 |
response = requests.get(url)
|
| 9 |
soup = BeautifulSoup(response.text, 'html.parser')
|
| 10 |
|
| 11 |
+
seen_urls = set()
|
| 12 |
+
links_and_titles = []
|
| 13 |
# ๋ชจ๋ PDF ๋งํฌ์ ์ ๋ชฉ์ ์ฐพ์ต๋๋ค.
|
| 14 |
pdf_links = soup.find_all('a', href=re.compile("^https://ssl.pstatic.net/imgstock/upload/research/company/.*\.pdf$"))
|
|
|
|
| 15 |
for link in pdf_links:
|
| 16 |
title = link.text.strip() # ๋งํฌ ํ
์คํธ์์ ์ ๋ชฉ ์ถ์ถ
|
| 17 |
full_url = link['href']
|
| 18 |
+
if full_url not in seen_urls:
|
| 19 |
+
seen_urls.add(full_url)
|
| 20 |
+
# ์ค๋ณต ์ ๊ฑฐ ํ ๋ค์ด๋ก๋ ๊ฐ๋ฅํ ๋งํฌ ํํ๋ก ์ ์ฅ
|
| 21 |
+
links_and_titles.append([title, f"<a href='{full_url}' download='{full_url.split('/')[-1]}'>{full_url}</a>"])
|
| 22 |
return links_and_titles
|
| 23 |
|
| 24 |
# Gradio ์ธํฐํ์ด์ค
|