Spaces:

orgoflu
/

moro_text_2

Sleeping

App Files Files Community

orgoflu commited on Sep 11, 2025

Commit

276cd92

verified ·

1 Parent(s): dfdac42

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -13

app.py CHANGED Viewed

@@ -12,24 +12,24 @@ import re
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
-# ===== 비교용 모델 목록 =====
 MODEL_OPTIONS = {
-    "Qwen2.5-1.5B-Instruct (기본)": "Qwen/Qwen2.5-1.5B-Instruct",
-    "DeepSeek-R1-Distill-Qwen-1.5B": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
-    "SOLAR-1.5B-Instruct": "upstage/SOLAR-1.5B-Instruct",
-    "Gemma-2-2B-it": "google/gemma-2-2b-it"
 }
 # ===== 모델 로드 =====
 def load_model(model_name):
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
-        torch_dtype=torch.float32
     ).to("cpu")
     return pipeline("text-generation", model=model, tokenizer=tokenizer, device=-1)
-# ===== 유틸 =====
 def clean_text(text: str) -> str:
     return re.sub(r'\s+', ' ', text).strip()
@@ -42,7 +42,7 @@ def remove_duplicates(sentences):
             result.append(s_clean)
     return result
-# ===== 자동요약 =====
 def summarize_text(text):
     text = clean_text(text)
     length = len(text)
@@ -87,7 +87,7 @@ def rewrite_with_llm(sentences, model_choice):
 문장:
 {joined_text}
 """
-    result = llm_pipeline(prompt, max_new_tokens=180, do_sample=False, temperature=0)
     return result[0]["generated_text"].replace(prompt, "").strip()
 # ===== 전체 파이프라인 =====
@@ -129,15 +129,15 @@ iface = gr.Interface(
     fn=extract_summarize_paraphrase,
     inputs=[
         gr.Textbox(label="URL 입력", placeholder="https://example.com"),
-        gr.Dropdown(choices=list(MODEL_OPTIONS.keys()), value="Qwen2.5-1.5B-Instruct (기본)", label="재작성 모델 선택")
     ],
     outputs=[
         gr.Markdown(label="추출된 본문"),
         gr.Textbox(label="자동 요약", lines=5),
         gr.Textbox(label="자동 재작성 (LLM)", lines=5)
     ],
-    title="한국어 본문 추출 + 자동 요약 + LLM 재작성 (모델 비교)",
-    description="본문은 TextRank로 요약하고, 재작성은 선택한 Hugging Face Hub LLM으로 처리합니다."
 )
 if __name__ == "__main__":

 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+# ===== 사용할 모델 3개 =====
 MODEL_OPTIONS = {
+    "Qwen2.5-1.5B-Instruct": "Qwen/Qwen2.5-1.5B-Instruct",
+    "Gemma-3-4B-it": "google/gemma-3-4b-it",
+    "HyperCLOVA-X-Seed-3B": "naver-clova/HyperCLOVA-X-Seed-3B"
 }
 # ===== 모델 로드 =====
 def load_model(model_name):
+    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
+        torch_dtype=torch.float32,
+        trust_remote_code=True
     ).to("cpu")
     return pipeline("text-generation", model=model, tokenizer=tokenizer, device=-1)
+# ===== 텍스트 전처리 =====
 def clean_text(text: str) -> str:
     return re.sub(r'\s+', ' ', text).strip()
             result.append(s_clean)
     return result
+# ===== 자동 요약 =====
 def summarize_text(text):
     text = clean_text(text)
     length = len(text)
 문장:
 {joined_text}
 """
+    result = llm_pipeline(prompt, max_new_tokens=150, do_sample=False, temperature=0)
     return result[0]["generated_text"].replace(prompt, "").strip()
 # ===== 전체 파이프라인 =====
     fn=extract_summarize_paraphrase,
     inputs=[
         gr.Textbox(label="URL 입력", placeholder="https://example.com"),
+        gr.Dropdown(choices=list(MODEL_OPTIONS.keys()), value="Qwen2.5-1.5B-Instruct", label="재작성 모델 선택")
     ],
     outputs=[
         gr.Markdown(label="추출된 본문"),
         gr.Textbox(label="자동 요약", lines=5),
         gr.Textbox(label="자동 재작성 (LLM)", lines=5)
     ],
+    title="한국어 본문 추출 + 자동 요약 + LLM 재작성",
+    description="Qwen 1.5B, Gemma 3 E4B, HyperCLOVA-X-Seed-3B 중 선택하여 재작성"
 )
 if __name__ == "__main__":