| import gradio as gr | |
| import re | |
| import requests | |
| from bs4 import BeautifulSoup | |
| def extract_pdf_links(url): | |
| response = requests.get(url) | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| pdf_links = [] | |
| for link in soup.find_all('a', href=True): | |
| if re.search(r'\.pdf', link['href']): | |
| pdf_links.append(link['href']) | |
| return pdf_links[:100] | |
| def generate_html(pdf_links): | |
| html = "" | |
| for link in pdf_links: | |
| html += f'<a href="{link}" target="_blank" download>{link}</a><br/>' | |
| return html | |
| title = "네이버 증권 리서치 링크- https://finance.naver.com/research/company_list.naver" | |
| iface = gr.Interface(extract_pdf_links, | |
| inputs="text", | |
| outputs="text", | |
| title=title) | |
| iface.launch() |