import nltk nltk.download("punkt") nltk.download("punkt_tab") import gradio as gr import trafilatura import requests from markdownify import markdownify as md from sumy.parsers.plaintext import PlaintextParser from sumy.nlp.tokenizers import Tokenizer from sumy.summarizers.text_rank import TextRankSummarizer import re def summarize_text(text, sentence_count=3): parser = PlaintextParser.from_string(text, Tokenizer("english")) summarizer = TextRankSummarizer() summary_sentences = summarizer(parser.document, sentence_count) return [str(sentence) for sentence in summary_sentences] def paraphrase_text(sentences): # 간단한 로컬 paraphrase: 동의어 치환 + 문장 구조 변경 # (LLM 없이 동작, 품질은 기본 수준) paraphrased = [] replacements = { "노조": "노동조합", "성과급": "성과 보상금", "요구": "요청", "불만": "이의 제기", "합의안": "협상 결과안" } for s in sentences: new_s = s for k, v in replacements.items(): new_s = re.sub(k, v, new_s) # 어순 살짝 변경 예시 if "는" in new_s: parts = new_s.split("는", 1) new_s = f"{parts[1].strip()} — {parts[0].strip()}는" paraphrased.append(new_s) return paraphrased def extract_summarize_paraphrase(url): headers = {"User-Agent": "Mozilla/5.0"} try: r = requests.get(url, headers=headers, timeout=10) r.raise_for_status() html_content = trafilatura.extract( r.text, output_format="html", include_tables=True, favor_recall=True ) if not html_content: return "본문을 추출할 수 없습니다.", "", "" markdown_text = md(html_content, heading_style="ATX") summary_sentences = summarize_text(markdown_text, sentence_count=3) paraphrased_sentences = paraphrase_text(summary_sentences) return markdown_text, "\n".join(summary_sentences), "\n".join(paraphrased_sentences) except Exception as e: return f"에러 발생: {e}", "", "" iface = gr.Interface( fn=extract_summarize_paraphrase, inputs=gr.Textbox(label="URL 입력", placeholder="https://example.com"), outputs=[ gr.Markdown(label="추출된 본문"), gr.Textbox(label="자동 요약", lines=5), gr.Textbox(label="자동 재작성", lines=5) ], title="본문 추출기 + 자동 요약 + 자동 재작성", description="웹페이지 URL을 입력하면 본문을 추출하고, 3문장 요약과 재작성(Paraphrasing) 결과를 제공합니다." ) if __name__ == "__main__": iface.launch()