Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,24 +12,24 @@ import re
|
|
| 12 |
import torch
|
| 13 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
| 14 |
|
| 15 |
-
# =====
|
| 16 |
MODEL_OPTIONS = {
|
| 17 |
-
"Qwen2.5-1.5B-Instruct
|
| 18 |
-
"
|
| 19 |
-
"
|
| 20 |
-
"Gemma-2-2B-it": "google/gemma-2-2b-it"
|
| 21 |
}
|
| 22 |
|
| 23 |
# ===== ๋ชจ๋ธ ๋ก๋ =====
|
| 24 |
def load_model(model_name):
|
| 25 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 26 |
model = AutoModelForCausalLM.from_pretrained(
|
| 27 |
model_name,
|
| 28 |
-
torch_dtype=torch.float32
|
|
|
|
| 29 |
).to("cpu")
|
| 30 |
return pipeline("text-generation", model=model, tokenizer=tokenizer, device=-1)
|
| 31 |
|
| 32 |
-
# =====
|
| 33 |
def clean_text(text: str) -> str:
|
| 34 |
return re.sub(r'\s+', ' ', text).strip()
|
| 35 |
|
|
@@ -42,7 +42,7 @@ def remove_duplicates(sentences):
|
|
| 42 |
result.append(s_clean)
|
| 43 |
return result
|
| 44 |
|
| 45 |
-
# =====
|
| 46 |
def summarize_text(text):
|
| 47 |
text = clean_text(text)
|
| 48 |
length = len(text)
|
|
@@ -87,7 +87,7 @@ def rewrite_with_llm(sentences, model_choice):
|
|
| 87 |
๋ฌธ์ฅ:
|
| 88 |
{joined_text}
|
| 89 |
"""
|
| 90 |
-
result = llm_pipeline(prompt, max_new_tokens=
|
| 91 |
return result[0]["generated_text"].replace(prompt, "").strip()
|
| 92 |
|
| 93 |
# ===== ์ ์ฒด ํ์ดํ๋ผ์ธ =====
|
|
@@ -129,15 +129,15 @@ iface = gr.Interface(
|
|
| 129 |
fn=extract_summarize_paraphrase,
|
| 130 |
inputs=[
|
| 131 |
gr.Textbox(label="URL ์
๋ ฅ", placeholder="https://example.com"),
|
| 132 |
-
gr.Dropdown(choices=list(MODEL_OPTIONS.keys()), value="Qwen2.5-1.5B-Instruct
|
| 133 |
],
|
| 134 |
outputs=[
|
| 135 |
gr.Markdown(label="์ถ์ถ๋ ๋ณธ๋ฌธ"),
|
| 136 |
gr.Textbox(label="์๋ ์์ฝ", lines=5),
|
| 137 |
gr.Textbox(label="์๋ ์ฌ์์ฑ (LLM)", lines=5)
|
| 138 |
],
|
| 139 |
-
title="ํ๊ตญ์ด ๋ณธ๋ฌธ ์ถ์ถ + ์๋ ์์ฝ + LLM ์ฌ์์ฑ
|
| 140 |
-
description="
|
| 141 |
)
|
| 142 |
|
| 143 |
if __name__ == "__main__":
|
|
|
|
| 12 |
import torch
|
| 13 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
| 14 |
|
| 15 |
+
# ===== ์ฌ์ฉํ ๋ชจ๋ธ 3๊ฐ =====
|
| 16 |
MODEL_OPTIONS = {
|
| 17 |
+
"Qwen2.5-1.5B-Instruct": "Qwen/Qwen2.5-1.5B-Instruct",
|
| 18 |
+
"Gemma-3-4B-it": "google/gemma-3-4b-it",
|
| 19 |
+
"HyperCLOVA-X-Seed-3B": "naver-clova/HyperCLOVA-X-Seed-3B"
|
|
|
|
| 20 |
}
|
| 21 |
|
| 22 |
# ===== ๋ชจ๋ธ ๋ก๋ =====
|
| 23 |
def load_model(model_name):
|
| 24 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
| 25 |
model = AutoModelForCausalLM.from_pretrained(
|
| 26 |
model_name,
|
| 27 |
+
torch_dtype=torch.float32,
|
| 28 |
+
trust_remote_code=True
|
| 29 |
).to("cpu")
|
| 30 |
return pipeline("text-generation", model=model, tokenizer=tokenizer, device=-1)
|
| 31 |
|
| 32 |
+
# ===== ํ
์คํธ ์ ์ฒ๋ฆฌ =====
|
| 33 |
def clean_text(text: str) -> str:
|
| 34 |
return re.sub(r'\s+', ' ', text).strip()
|
| 35 |
|
|
|
|
| 42 |
result.append(s_clean)
|
| 43 |
return result
|
| 44 |
|
| 45 |
+
# ===== ์๋ ์์ฝ =====
|
| 46 |
def summarize_text(text):
|
| 47 |
text = clean_text(text)
|
| 48 |
length = len(text)
|
|
|
|
| 87 |
๋ฌธ์ฅ:
|
| 88 |
{joined_text}
|
| 89 |
"""
|
| 90 |
+
result = llm_pipeline(prompt, max_new_tokens=150, do_sample=False, temperature=0)
|
| 91 |
return result[0]["generated_text"].replace(prompt, "").strip()
|
| 92 |
|
| 93 |
# ===== ์ ์ฒด ํ์ดํ๋ผ์ธ =====
|
|
|
|
| 129 |
fn=extract_summarize_paraphrase,
|
| 130 |
inputs=[
|
| 131 |
gr.Textbox(label="URL ์
๋ ฅ", placeholder="https://example.com"),
|
| 132 |
+
gr.Dropdown(choices=list(MODEL_OPTIONS.keys()), value="Qwen2.5-1.5B-Instruct", label="์ฌ์์ฑ ๋ชจ๋ธ ์ ํ")
|
| 133 |
],
|
| 134 |
outputs=[
|
| 135 |
gr.Markdown(label="์ถ์ถ๋ ๋ณธ๋ฌธ"),
|
| 136 |
gr.Textbox(label="์๋ ์์ฝ", lines=5),
|
| 137 |
gr.Textbox(label="์๋ ์ฌ์์ฑ (LLM)", lines=5)
|
| 138 |
],
|
| 139 |
+
title="ํ๊ตญ์ด ๋ณธ๋ฌธ ์ถ์ถ + ์๋ ์์ฝ + LLM ์ฌ์์ฑ",
|
| 140 |
+
description="Qwen 1.5B, Gemma 3 E4B, HyperCLOVA-X-Seed-3B ์ค ์ ํํ์ฌ ์ฌ์์ฑ"
|
| 141 |
)
|
| 142 |
|
| 143 |
if __name__ == "__main__":
|