File size: 1,317 Bytes
908d904 7aef82d 908d904 7aef82d f03ed3b 7aef82d f03ed3b 7aef82d 908d904 7aef82d 908d904 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
import gradio as gr
import trafilatura
import requests
from markdownify import markdownify as md
def extract(url):
headers = {"User-Agent": "Mozilla/5.0"}
try:
r = requests.get(url, headers=headers, timeout=10)
r.raise_for_status()
# HTML ํํ๋ก ์ถ์ถ
html_content = trafilatura.extract(
r.text,
output_format="html",
include_tables=True,
favor_recall=True
)
if html_content:
# HTML โ Markdown ๋ณํ
markdown_text = md(html_content, heading_style="ATX")
return markdown_text
return "๋ณธ๋ฌธ์ ์ถ์ถํ ์ ์์ต๋๋ค."
except requests.exceptions.Timeout:
return "์์ฒญ์ด ์๊ฐ ์ด๊ณผ๋์์ต๋๋ค."
except requests.exceptions.RequestException as e:
return f"์์ฒญ ์คํจ: {e}"
except Exception as e:
return f"์๋ฌ ๋ฐ์: {e}"
iface = gr.Interface(
fn=extract,
inputs=gr.Textbox(label="URL ์
๋ ฅ", placeholder="https://example.com"),
outputs=gr.Markdown(label="์ถ์ถ๋ ๋ณธ๋ฌธ"),
title="๋ณธ๋ฌธ ์ถ์ถ๊ธฐ (๋งํฌ๋ค์ด ์ง์)",
description="์นํ์ด์ง URL์ ์
๋ ฅํ๋ฉด ๋ฆฌ๋๋ชจ๋์ฒ๋ผ ๊น๋ํ๊ฒ ๋งํฌ๋ค์ด์ผ๋ก ์ถ์ถํฉ๋๋ค."
)
if __name__ == "__main__":
iface.launch() |