Spaces:
Sleeping
Sleeping
| import nltk | |
| # NLTK ํ ํฌ๋์ด์ ๋ฆฌ์์ค ์๋ ๋ค์ด๋ก๋ | |
| nltk.download("punkt") | |
| nltk.download("punkt_tab") | |
| import gradio as gr | |
| import trafilatura | |
| import requests | |
| from markdownify import markdownify as md | |
| from sumy.parsers.plaintext import PlaintextParser | |
| from sumy.nlp.tokenizers import Tokenizer | |
| from sumy.summarizers.text_rank import TextRankSummarizer | |
| def summarize_text(text, sentence_count=3): | |
| # ํ๊ตญ์ด๋ ๋ฌธ์ฅ ๋จ์๋ก ๋๊ธฐ ์ํด english ํ ํฌ๋์ด์ ์ฌ์ฉ | |
| parser = PlaintextParser.from_string(text, Tokenizer("english")) | |
| summarizer = TextRankSummarizer() | |
| summary_sentences = summarizer(parser.document, sentence_count) | |
| return "\n".join(str(sentence) for sentence in summary_sentences) | |
| def extract_and_summarize(url): | |
| headers = {"User-Agent": "Mozilla/5.0"} | |
| try: | |
| r = requests.get(url, headers=headers, timeout=10) | |
| r.raise_for_status() | |
| # HTML ํํ๋ก ๋ณธ๋ฌธ ์ถ์ถ | |
| html_content = trafilatura.extract( | |
| r.text, | |
| output_format="html", | |
| include_tables=True, | |
| favor_recall=True | |
| ) | |
| if not html_content: | |
| return "๋ณธ๋ฌธ์ ์ถ์ถํ ์ ์์ต๋๋ค.", "" | |
| # HTML โ Markdown ๋ณํ | |
| markdown_text = md(html_content, heading_style="ATX") | |
| # ์์ฝ ์์ฑ | |
| summary = summarize_text(markdown_text, sentence_count=3) | |
| return markdown_text, summary | |
| except requests.exceptions.Timeout: | |
| return "์์ฒญ์ด ์๊ฐ ์ด๊ณผ๋์์ต๋๋ค.", "" | |
| except requests.exceptions.RequestException as e: | |
| return f"์์ฒญ ์คํจ: {e}", "" | |
| except Exception as e: | |
| return f"์๋ฌ ๋ฐ์: {e}", "" | |
| iface = gr.Interface( | |
| fn=extract_and_summarize, | |
| inputs=gr.Textbox(label="URL ์ ๋ ฅ", placeholder="https://example.com"), | |
| outputs=[ | |
| gr.Markdown(label="์ถ์ถ๋ ๋ณธ๋ฌธ"), | |
| gr.Textbox(label="์๋ ์์ฝ", lines=5) | |
| ], | |
| title="๋ณธ๋ฌธ ์ถ์ถ๊ธฐ + ์๋ ์์ฝ", | |
| description="์นํ์ด์ง URL์ ์ ๋ ฅํ๋ฉด ๋ณธ๋ฌธ์ ์ถ์ถํ๊ณ , TextRank ์๊ณ ๋ฆฌ์ฆ์ผ๋ก 3๋ฌธ์ฅ ์์ฝ์ ์ ๊ณตํฉ๋๋ค." | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |