Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,15 +12,15 @@ import re
|
|
| 12 |
import torch
|
| 13 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
| 14 |
|
| 15 |
-
# =====
|
| 16 |
MODEL_OPTIONS = {
|
| 17 |
-
"Qwen2.5-1.5B-Instruct (
|
| 18 |
-
"
|
| 19 |
-
"
|
| 20 |
-
"
|
| 21 |
}
|
| 22 |
|
| 23 |
-
# ===== ๋ชจ๋ธ ๋ก๋
|
| 24 |
def load_model(model_name):
|
| 25 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 26 |
model = AutoModelForCausalLM.from_pretrained(
|
|
@@ -42,7 +42,7 @@ def remove_duplicates(sentences):
|
|
| 42 |
result.append(s_clean)
|
| 43 |
return result
|
| 44 |
|
| 45 |
-
# ===== ์๋์์ฝ
|
| 46 |
def summarize_text(text):
|
| 47 |
text = clean_text(text)
|
| 48 |
length = len(text)
|
|
@@ -75,7 +75,7 @@ def summarize_text(text):
|
|
| 75 |
summary_list.sort(key=lambda s: text.find(s))
|
| 76 |
return summary_list
|
| 77 |
|
| 78 |
-
# ===== LLM
|
| 79 |
def rewrite_with_llm(sentences, model_choice):
|
| 80 |
model_name = MODEL_OPTIONS[model_choice]
|
| 81 |
llm_pipeline = load_model(model_name)
|
|
@@ -129,14 +129,14 @@ iface = gr.Interface(
|
|
| 129 |
fn=extract_summarize_paraphrase,
|
| 130 |
inputs=[
|
| 131 |
gr.Textbox(label="URL ์
๋ ฅ", placeholder="https://example.com"),
|
| 132 |
-
gr.Dropdown(choices=list(MODEL_OPTIONS.keys()), value="Qwen2.5-
|
| 133 |
],
|
| 134 |
outputs=[
|
| 135 |
gr.Markdown(label="์ถ์ถ๋ ๋ณธ๋ฌธ"),
|
| 136 |
gr.Textbox(label="์๋ ์์ฝ", lines=5),
|
| 137 |
gr.Textbox(label="์๋ ์ฌ์์ฑ (LLM)", lines=5)
|
| 138 |
],
|
| 139 |
-
title="ํ๊ตญ์ด ๋ณธ๋ฌธ ์ถ์ถ + ์๋ ์์ฝ + LLM ์ฌ์์ฑ (๋ชจ๋ธ
|
| 140 |
description="๋ณธ๋ฌธ์ TextRank๋ก ์์ฝํ๊ณ , ์ฌ์์ฑ์ ์ ํํ Hugging Face Hub LLM์ผ๋ก ์ฒ๋ฆฌํฉ๋๋ค."
|
| 141 |
)
|
| 142 |
|
|
|
|
| 12 |
import torch
|
| 13 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
| 14 |
|
| 15 |
+
# ===== ๋น๊ต์ฉ ๋ชจ๋ธ ๋ชฉ๋ก =====
|
| 16 |
MODEL_OPTIONS = {
|
| 17 |
+
"Qwen2.5-1.5B-Instruct (๊ธฐ๋ณธ)": "Qwen/Qwen2.5-1.5B-Instruct",
|
| 18 |
+
"DeepSeek-R1-Distill-Qwen-1.5B": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
|
| 19 |
+
"SOLAR-1.5B-Instruct": "upstage/SOLAR-1.5B-Instruct",
|
| 20 |
+
"Gemma-2-2B-it": "google/gemma-2-2b-it"
|
| 21 |
}
|
| 22 |
|
| 23 |
+
# ===== ๋ชจ๋ธ ๋ก๋ =====
|
| 24 |
def load_model(model_name):
|
| 25 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 26 |
model = AutoModelForCausalLM.from_pretrained(
|
|
|
|
| 42 |
result.append(s_clean)
|
| 43 |
return result
|
| 44 |
|
| 45 |
+
# ===== ์๋์์ฝ =====
|
| 46 |
def summarize_text(text):
|
| 47 |
text = clean_text(text)
|
| 48 |
length = len(text)
|
|
|
|
| 75 |
summary_list.sort(key=lambda s: text.find(s))
|
| 76 |
return summary_list
|
| 77 |
|
| 78 |
+
# ===== LLM ์ฌ์์ฑ =====
|
| 79 |
def rewrite_with_llm(sentences, model_choice):
|
| 80 |
model_name = MODEL_OPTIONS[model_choice]
|
| 81 |
llm_pipeline = load_model(model_name)
|
|
|
|
| 129 |
fn=extract_summarize_paraphrase,
|
| 130 |
inputs=[
|
| 131 |
gr.Textbox(label="URL ์
๋ ฅ", placeholder="https://example.com"),
|
| 132 |
+
gr.Dropdown(choices=list(MODEL_OPTIONS.keys()), value="Qwen2.5-1.5B-Instruct (๊ธฐ๋ณธ)", label="์ฌ์์ฑ ๋ชจ๋ธ ์ ํ")
|
| 133 |
],
|
| 134 |
outputs=[
|
| 135 |
gr.Markdown(label="์ถ์ถ๋ ๋ณธ๋ฌธ"),
|
| 136 |
gr.Textbox(label="์๋ ์์ฝ", lines=5),
|
| 137 |
gr.Textbox(label="์๋ ์ฌ์์ฑ (LLM)", lines=5)
|
| 138 |
],
|
| 139 |
+
title="ํ๊ตญ์ด ๋ณธ๋ฌธ ์ถ์ถ + ์๋ ์์ฝ + LLM ์ฌ์์ฑ (๋ชจ๋ธ ๋น๊ต)",
|
| 140 |
description="๋ณธ๋ฌธ์ TextRank๋ก ์์ฝํ๊ณ , ์ฌ์์ฑ์ ์ ํํ Hugging Face Hub LLM์ผ๋ก ์ฒ๋ฆฌํฉ๋๋ค."
|
| 141 |
)
|
| 142 |
|