Spaces:
Sleeping
Sleeping
File size: 2,189 Bytes
00e61ba 5f01545 30f4683 00e61ba 5f01545 3c8bf47 00e61ba 629d00c 00e61ba 3c8bf47 00e61ba 30f4683 00e61ba 30f4683 00e61ba 30f4683 5f01545 00e61ba 5f01545 00e61ba 5f01545 00e61ba 5f01545 00e61ba |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import nltk
# NLTK ํ ํฌ๋์ด์ ๋ฆฌ์์ค ์๋ ๋ค์ด๋ก๋
nltk.download("punkt")
nltk.download("punkt_tab")
import gradio as gr
import trafilatura
import requests
from markdownify import markdownify as md
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.text_rank import TextRankSummarizer
def summarize_text(text, sentence_count=3):
# ํ๊ตญ์ด๋ ๋ฌธ์ฅ ๋จ์๋ก ๋๊ธฐ ์ํด english ํ ํฌ๋์ด์ ์ฌ์ฉ
parser = PlaintextParser.from_string(text, Tokenizer("english"))
summarizer = TextRankSummarizer()
summary_sentences = summarizer(parser.document, sentence_count)
return "\n".join(str(sentence) for sentence in summary_sentences)
def extract_and_summarize(url):
headers = {"User-Agent": "Mozilla/5.0"}
try:
r = requests.get(url, headers=headers, timeout=10)
r.raise_for_status()
# HTML ํํ๋ก ๋ณธ๋ฌธ ์ถ์ถ
html_content = trafilatura.extract(
r.text,
output_format="html",
include_tables=True,
favor_recall=True
)
if not html_content:
return "๋ณธ๋ฌธ์ ์ถ์ถํ ์ ์์ต๋๋ค.", ""
# HTML โ Markdown ๋ณํ
markdown_text = md(html_content, heading_style="ATX")
# ์์ฝ ์์ฑ
summary = summarize_text(markdown_text, sentence_count=3)
return markdown_text, summary
except requests.exceptions.Timeout:
return "์์ฒญ์ด ์๊ฐ ์ด๊ณผ๋์์ต๋๋ค.", ""
except requests.exceptions.RequestException as e:
return f"์์ฒญ ์คํจ: {e}", ""
except Exception as e:
return f"์๋ฌ ๋ฐ์: {e}", ""
iface = gr.Interface(
fn=extract_and_summarize,
inputs=gr.Textbox(label="URL ์
๋ ฅ", placeholder="https://example.com"),
outputs=[
gr.Markdown(label="์ถ์ถ๋ ๋ณธ๋ฌธ"),
gr.Textbox(label="์๋ ์์ฝ", lines=5)
],
title="๋ณธ๋ฌธ ์ถ์ถ๊ธฐ + ์๋ ์์ฝ",
description="์นํ์ด์ง URL์ ์
๋ ฅํ๋ฉด ๋ณธ๋ฌธ์ ์ถ์ถํ๊ณ , TextRank ์๊ณ ๋ฆฌ์ฆ์ผ๋ก 3๋ฌธ์ฅ ์์ฝ์ ์ ๊ณตํฉ๋๋ค."
)
if __name__ == "__main__":
iface.launch()
|