Spaces:
Sleeping
Sleeping
| import nltk | |
| nltk.download("punkt") | |
| nltk.download("punkt_tab") | |
| import gradio as gr | |
| import trafilatura | |
| import requests | |
| from markdownify import markdownify as md | |
| from sumy.parsers.plaintext import PlaintextParser | |
| from sumy.nlp.tokenizers import Tokenizer | |
| from sumy.summarizers.text_rank import TextRankSummarizer | |
| import re | |
| def summarize_text(text, sentence_count=3): | |
| parser = PlaintextParser.from_string(text, Tokenizer("english")) | |
| summarizer = TextRankSummarizer() | |
| summary_sentences = summarizer(parser.document, sentence_count) | |
| return [str(sentence) for sentence in summary_sentences] | |
| def paraphrase_text(sentences): | |
| # ๊ฐ๋จํ ๋ก์ปฌ paraphrase: ๋์์ด ์นํ + ๋ฌธ์ฅ ๊ตฌ์กฐ ๋ณ๊ฒฝ | |
| # (LLM ์์ด ๋์, ํ์ง์ ๊ธฐ๋ณธ ์์ค) | |
| paraphrased = [] | |
| replacements = { | |
| "๋ ธ์กฐ": "๋ ธ๋์กฐํฉ", | |
| "์ฑ๊ณผ๊ธ": "์ฑ๊ณผ ๋ณด์๊ธ", | |
| "์๊ตฌ": "์์ฒญ", | |
| "๋ถ๋ง": "์ด์ ์ ๊ธฐ", | |
| "ํฉ์์": "ํ์ ๊ฒฐ๊ณผ์" | |
| } | |
| for s in sentences: | |
| new_s = s | |
| for k, v in replacements.items(): | |
| new_s = re.sub(k, v, new_s) | |
| # ์ด์ ์ด์ง ๋ณ๊ฒฝ ์์ | |
| if "๋" in new_s: | |
| parts = new_s.split("๋", 1) | |
| new_s = f"{parts[1].strip()} โ {parts[0].strip()}๋" | |
| paraphrased.append(new_s) | |
| return paraphrased | |
| def extract_summarize_paraphrase(url): | |
| headers = {"User-Agent": "Mozilla/5.0"} | |
| try: | |
| r = requests.get(url, headers=headers, timeout=10) | |
| r.raise_for_status() | |
| html_content = trafilatura.extract( | |
| r.text, | |
| output_format="html", | |
| include_tables=True, | |
| favor_recall=True | |
| ) | |
| if not html_content: | |
| return "๋ณธ๋ฌธ์ ์ถ์ถํ ์ ์์ต๋๋ค.", "", "" | |
| markdown_text = md(html_content, heading_style="ATX") | |
| summary_sentences = summarize_text(markdown_text, sentence_count=3) | |
| paraphrased_sentences = paraphrase_text(summary_sentences) | |
| return markdown_text, "\n".join(summary_sentences), "\n".join(paraphrased_sentences) | |
| except Exception as e: | |
| return f"์๋ฌ ๋ฐ์: {e}", "", "" | |
| iface = gr.Interface( | |
| fn=extract_summarize_paraphrase, | |
| inputs=gr.Textbox(label="URL ์ ๋ ฅ", placeholder="https://example.com"), | |
| outputs=[ | |
| gr.Markdown(label="์ถ์ถ๋ ๋ณธ๋ฌธ"), | |
| gr.Textbox(label="์๋ ์์ฝ", lines=5), | |
| gr.Textbox(label="์๋ ์ฌ์์ฑ", lines=5) | |
| ], | |
| title="๋ณธ๋ฌธ ์ถ์ถ๊ธฐ + ์๋ ์์ฝ + ์๋ ์ฌ์์ฑ", | |
| description="์นํ์ด์ง URL์ ์ ๋ ฅํ๋ฉด ๋ณธ๋ฌธ์ ์ถ์ถํ๊ณ , 3๋ฌธ์ฅ ์์ฝ๊ณผ ์ฌ์์ฑ(Paraphrasing) ๊ฒฐ๊ณผ๋ฅผ ์ ๊ณตํฉ๋๋ค." | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() |