orgoflu commited on
Commit
276cd92
ยท
verified ยท
1 Parent(s): dfdac42

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -13
app.py CHANGED
@@ -12,24 +12,24 @@ import re
12
  import torch
13
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
14
 
15
- # ===== ๋น„๊ต์šฉ ๋ชจ๋ธ ๋ชฉ๋ก =====
16
  MODEL_OPTIONS = {
17
- "Qwen2.5-1.5B-Instruct (๊ธฐ๋ณธ)": "Qwen/Qwen2.5-1.5B-Instruct",
18
- "DeepSeek-R1-Distill-Qwen-1.5B": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
19
- "SOLAR-1.5B-Instruct": "upstage/SOLAR-1.5B-Instruct",
20
- "Gemma-2-2B-it": "google/gemma-2-2b-it"
21
  }
22
 
23
  # ===== ๋ชจ๋ธ ๋กœ๋“œ =====
24
  def load_model(model_name):
25
- tokenizer = AutoTokenizer.from_pretrained(model_name)
26
  model = AutoModelForCausalLM.from_pretrained(
27
  model_name,
28
- torch_dtype=torch.float32
 
29
  ).to("cpu")
30
  return pipeline("text-generation", model=model, tokenizer=tokenizer, device=-1)
31
 
32
- # ===== ์œ ํ‹ธ =====
33
  def clean_text(text: str) -> str:
34
  return re.sub(r'\s+', ' ', text).strip()
35
 
@@ -42,7 +42,7 @@ def remove_duplicates(sentences):
42
  result.append(s_clean)
43
  return result
44
 
45
- # ===== ์ž๋™์š”์•ฝ =====
46
  def summarize_text(text):
47
  text = clean_text(text)
48
  length = len(text)
@@ -87,7 +87,7 @@ def rewrite_with_llm(sentences, model_choice):
87
  ๋ฌธ์žฅ:
88
  {joined_text}
89
  """
90
- result = llm_pipeline(prompt, max_new_tokens=180, do_sample=False, temperature=0)
91
  return result[0]["generated_text"].replace(prompt, "").strip()
92
 
93
  # ===== ์ „์ฒด ํŒŒ์ดํ”„๋ผ์ธ =====
@@ -129,15 +129,15 @@ iface = gr.Interface(
129
  fn=extract_summarize_paraphrase,
130
  inputs=[
131
  gr.Textbox(label="URL ์ž…๋ ฅ", placeholder="https://example.com"),
132
- gr.Dropdown(choices=list(MODEL_OPTIONS.keys()), value="Qwen2.5-1.5B-Instruct (๊ธฐ๋ณธ)", label="์žฌ์ž‘์„ฑ ๋ชจ๋ธ ์„ ํƒ")
133
  ],
134
  outputs=[
135
  gr.Markdown(label="์ถ”์ถœ๋œ ๋ณธ๋ฌธ"),
136
  gr.Textbox(label="์ž๋™ ์š”์•ฝ", lines=5),
137
  gr.Textbox(label="์ž๋™ ์žฌ์ž‘์„ฑ (LLM)", lines=5)
138
  ],
139
- title="ํ•œ๊ตญ์–ด ๋ณธ๋ฌธ ์ถ”์ถœ + ์ž๋™ ์š”์•ฝ + LLM ์žฌ์ž‘์„ฑ (๋ชจ๋ธ ๋น„๊ต)",
140
- description="๋ณธ๋ฌธ์€ TextRank๋กœ ์š”์•ฝํ•˜๊ณ , ์žฌ์ž‘์„ฑ์€ ์„ ํƒํ•œ Hugging Face Hub LLM์œผ๋กœ ์ฒ˜๋ฆฌํ•ฉ๋‹ˆ๋‹ค."
141
  )
142
 
143
  if __name__ == "__main__":
 
12
  import torch
13
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
14
 
15
+ # ===== ์‚ฌ์šฉํ•  ๋ชจ๋ธ 3๊ฐœ =====
16
  MODEL_OPTIONS = {
17
+ "Qwen2.5-1.5B-Instruct": "Qwen/Qwen2.5-1.5B-Instruct",
18
+ "Gemma-3-4B-it": "google/gemma-3-4b-it",
19
+ "HyperCLOVA-X-Seed-3B": "naver-clova/HyperCLOVA-X-Seed-3B"
 
20
  }
21
 
22
  # ===== ๋ชจ๋ธ ๋กœ๋“œ =====
23
  def load_model(model_name):
24
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
25
  model = AutoModelForCausalLM.from_pretrained(
26
  model_name,
27
+ torch_dtype=torch.float32,
28
+ trust_remote_code=True
29
  ).to("cpu")
30
  return pipeline("text-generation", model=model, tokenizer=tokenizer, device=-1)
31
 
32
+ # ===== ํ…์ŠคํŠธ ์ „์ฒ˜๋ฆฌ =====
33
  def clean_text(text: str) -> str:
34
  return re.sub(r'\s+', ' ', text).strip()
35
 
 
42
  result.append(s_clean)
43
  return result
44
 
45
+ # ===== ์ž๋™ ์š”์•ฝ =====
46
  def summarize_text(text):
47
  text = clean_text(text)
48
  length = len(text)
 
87
  ๋ฌธ์žฅ:
88
  {joined_text}
89
  """
90
+ result = llm_pipeline(prompt, max_new_tokens=150, do_sample=False, temperature=0)
91
  return result[0]["generated_text"].replace(prompt, "").strip()
92
 
93
  # ===== ์ „์ฒด ํŒŒ์ดํ”„๋ผ์ธ =====
 
129
  fn=extract_summarize_paraphrase,
130
  inputs=[
131
  gr.Textbox(label="URL ์ž…๋ ฅ", placeholder="https://example.com"),
132
+ gr.Dropdown(choices=list(MODEL_OPTIONS.keys()), value="Qwen2.5-1.5B-Instruct", label="์žฌ์ž‘์„ฑ ๋ชจ๋ธ ์„ ํƒ")
133
  ],
134
  outputs=[
135
  gr.Markdown(label="์ถ”์ถœ๋œ ๋ณธ๋ฌธ"),
136
  gr.Textbox(label="์ž๋™ ์š”์•ฝ", lines=5),
137
  gr.Textbox(label="์ž๋™ ์žฌ์ž‘์„ฑ (LLM)", lines=5)
138
  ],
139
+ title="ํ•œ๊ตญ์–ด ๋ณธ๋ฌธ ์ถ”์ถœ + ์ž๋™ ์š”์•ฝ + LLM ์žฌ์ž‘์„ฑ",
140
+ description="Qwen 1.5B, Gemma 3 E4B, HyperCLOVA-X-Seed-3B ์ค‘ ์„ ํƒํ•˜์—ฌ ์žฌ์ž‘์„ฑ"
141
  )
142
 
143
  if __name__ == "__main__":