epinfomax commited on
Commit
f88ea6a
ยท
verified ยท
1 Parent(s): 7ef137c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -55
app.py CHANGED
@@ -1,66 +1,31 @@
1
  import gradio as gr
 
2
  import torch
3
- from transformers import AutoTokenizer, AutoModelForCausalLM
4
- from peft import PeftModel
5
 
6
- # 1. ๋ชจ๋ธ ID ์„ค์ •
7
- base_id = "Qwen/Qwen2.5-7B-Instruct"
8
- adapter_id = "epinfomax/BizFlow-Summarizer-Ko"
9
 
10
- # 2. ํ•˜๋“œ์›จ์–ด ์ž๋™ ์„ค์ • (GPU๊ฐ€ ์—†์œผ๋ฉด CPU๋กœ ๋Œ์•„๊ฐ€๋„๋ก ์ฒ˜๋ฆฌ)
11
- device = "cuda" if torch.cuda.is_available() else "cpu"
12
- dtype = torch.float16 if device == "cuda" else torch.float32
13
-
14
- print(f"๐Ÿš€ ๋ชจ๋ธ์„ ๋กœ๋”ฉ ์ค‘์ž…๋‹ˆ๋‹ค... (Device: {device})")
15
-
16
- # 3. ๋ชจ๋ธ๊ณผ ํ† ํฌ๋‚˜์ด์ € ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
17
- tokenizer = AutoTokenizer.from_pretrained(base_id)
18
- model = AutoModelForCausalLM.from_pretrained(base_id, torch_dtype=dtype)
19
- model = PeftModel.from_pretrained(model, adapter_id)
20
- model.to(device)
21
- model.eval()
22
 
23
  def summarize(text):
24
- # ์˜ค๋ฅ˜๊ฐ€ ๋‚ฌ๋˜ ๋ถ€๋ถ„ ์ˆ˜์ •: messages ๋ฆฌ์ŠคํŠธ๋ฅผ ๋ช…ํ™•ํžˆ ์ •์˜
25
- messages =
26
-
27
- # Qwen ์ฑ„ํŒ… ํ…œํ”Œ๋ฆฟ ์ ์šฉ
28
- input_text = tokenizer.apply_chat_template(
29
- messages,
30
- tokenize=False,
31
- add_generation_prompt=True
32
- )
33
-
34
- # ์ž…๋ ฅ ํ† ํฐํ™”
35
- inputs = tokenizer([input_text], return_tensors="pt").to(device)
36
-
37
- # ์ถ”๋ก  (์š”์•ฝ ์ƒ์„ฑ)
38
- with torch.no_grad():
39
- outputs = model.generate(
40
- **inputs,
41
- max_new_tokens=512, # ์ƒ์„ฑํ•  ์ตœ๋Œ€ ๊ธธ์ด
42
- temperature=0.3, # ๊ฐ’์ด ๋‚ฎ์„์ˆ˜๋ก ์‚ฌ์‹ค์ ์ธ ์š”์•ฝ
43
- repetition_penalty=1.1
44
- )
45
 
46
- # ๊ฒฐ๊ณผ ๋””์ฝ”๋”ฉ (์ž…๋ ฅ ํ”„๋กฌํ”„ํŠธ ์ œ์™ธํ•˜๊ณ  ์ˆœ์ˆ˜ ์š”์•ฝ๋ฌธ๋งŒ ์ถ”์ถœ)
47
- summary = tokenizer.decode(outputs[inputs.input_ids.shape[1]:], skip_special_tokens=True)
48
- return summary
 
49
 
50
- # 4. ์›น ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ (Gradio)
51
- iface = gr.Interface(
52
  fn=summarize,
53
- inputs=gr.Textbox(
54
- lines=15,
55
- placeholder="์—ฌ๊ธฐ์— ์š”์•ฝํ•  ๋‰ด์Šค ๊ธฐ์‚ฌ๋‚˜ ํšŒ์˜๋ก์„ ๋ถ™์—ฌ๋„ฃ์œผ์„ธ์š”...",
56
- label="์ž…๋ ฅ ๋ฌธ์„œ"
57
- ),
58
- outputs=gr.Textbox(label="์š”์•ฝ ๊ฒฐ๊ณผ"),
59
- title="BizFlow ๋ฌธ์„œ ์š”์•ฝ ์—์ด์ „ํŠธ",
60
- description="Qwen2.5-7B ๋ชจ๋ธ์„ ํŒŒ์ธํŠœ๋‹ํ•˜์—ฌ ๋งŒ๋“  ํ•œ๊ตญ์–ด ์ „๋ฌธ ์š”์•ฝ๊ธฐ์ž…๋‹ˆ๋‹ค.",
61
- examples=["์‚ผ์„ฑ์ „์ž๊ฐ€ ์˜ค๋Š˜ ์ปจํผ๋Ÿฐ์Šค์ฝœ์„ ํ†ตํ•ด ์ง€๋‚œํ•ด 4๋ถ„๊ธฐ ํ™•์ • ์‹ค์ ์„ ๋ฐœํ‘œํ–ˆ๋‹ค. ์—ฐ๊ฒฐ ๊ธฐ์ค€ ๋งค์ถœ์€ 67์กฐ 7800์–ต ์›์œผ๋กœ ์ „๋…„ ๋™๊ธฐ ๋Œ€๋น„ 3.8% ๊ฐ์†Œํ–ˆ์œผ๋‚˜, ์˜์—…์ด์ต์€ 2์กฐ 8200์–ต ์›์œผ๋กœ..."]
62
  )
63
 
64
- # ์•ฑ ์‹คํ–‰
65
- if __name__ == "__main__":
66
- iface.launch()
 
1
  import gradio as gr
2
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
3
  import torch
 
 
4
 
5
+ model_id = "epinfomax/BizFlow-Summarizer-Ko"
 
 
6
 
7
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
8
+ model = AutoModelForCausalLM.from_pretrained(
9
+ model_id,
10
+ torch_dtype=torch.float16,
11
+ device_map="auto"
12
+ )
 
 
 
 
 
 
13
 
14
  def summarize(text):
15
+ prompt = f"๋‹ค์Œ ๊ธ€์„ ์š”์•ฝํ•ด์ฃผ์„ธ์š”:\n\n{text}"
16
+ messages = [{"role": "user", "content": prompt}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt").to(model.device)
19
+ outputs = model.generate(input_ids, max_new_tokens=512, do_sample=True, temperature=0.7)
20
+ response = tokenizer.decode(outputs[0][input_ids.shape[1]:], skip_special_tokens=True)
21
+ return response
22
 
23
+ demo = gr.Interface(
 
24
  fn=summarize,
25
+ inputs=gr.Textbox(lines=10, label="์›๋ฌธ", placeholder="์š”์•ฝํ•  ํ…์ŠคํŠธ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”..."),
26
+ outputs=gr.Textbox(lines=5, label="์š”์•ฝ"),
27
+ title="BizFlow Summarizer Ko",
28
+ description="ํ•œ๊ตญ์–ด ๋‰ด์Šค/๋ฌธ์„œ ์š”์•ฝ ๋ชจ๋ธ"
 
 
 
 
 
29
  )
30
 
31
+ demo.launch()