epinfomax commited on
Commit
d7aaea6
ยท
verified ยท
1 Parent(s): 998c682

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -25
app.py CHANGED
@@ -2,43 +2,30 @@ import gradio as gr
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  from peft import PeftModel
5
- import spaces # โ˜… 1. spaces ์ž„ํฌํŠธ ์ถ”๊ฐ€
6
 
7
  # 1. ๋ชจ๋ธ ID ์„ค์ •
8
  base_id = "Qwen/Qwen2.5-7B-Instruct"
9
  adapter_id = "epinfomax/BizFlow-Summarizer-Ko"
10
 
11
- # 2. ๋ชจ๋ธ ๋กœ๋“œ (์ดˆ๊ธฐ ๋กœ๋”ฉ์€ CPU๋‚˜ ๋ฉ”ํƒ€ ์žฅ์น˜์—์„œ ์ˆ˜ํ–‰๋จ)
12
  print(f"๐Ÿš€ ๋ชจ๋ธ ๋กœ๋”ฉ ์ค‘...")
13
-
14
  tokenizer = AutoTokenizer.from_pretrained(base_id)
15
- model = AutoModelForCausalLM.from_pretrained(base_id, torch_dtype=torch.float16) # ZeroGPU์—์„œ๋Š” float16 ๊ถŒ์žฅ
16
  model = PeftModel.from_pretrained(model, adapter_id)
17
 
18
- # 3. ์ถ”๋ก  ํ•จ์ˆ˜ ์ •์˜
19
- @spaces.GPU # โ˜… 2. ์ด ๋ฐ์ฝ”๋ ˆ์ดํ„ฐ๊ฐ€ ์žˆ์–ด์•ผ GPU๊ฐ€ ํ• ๋‹น๋ฉ๋‹ˆ๋‹ค.
20
  def summarize(text):
21
- # GPU ํ• ๋‹น ์‹œ ๋ชจ๋ธ์„ ์ž๋™์œผ๋กœ CUDA๋กœ ์ด๋™์‹œํ‚ด (๋˜๋Š” ๋ช…์‹œ์  ์ด๋™)
22
- device = "cuda"
23
- model.to(device)
24
 
25
  messages = [
26
- {
27
- "role": "system",
28
- "content": "๋‹น์‹ ์€ ๋น„์ฆˆ๋‹ˆ์Šค ๋ฌธ์„œ๋ฅผ ์ „๋ฌธ์ ์œผ๋กœ ์š”์•ฝํ•˜๋Š” AI ์–ด์‹œ์Šคํ„ดํŠธ์ž…๋‹ˆ๋‹ค. ํ•ต์‹ฌ ๋‚ด์šฉ์„ ๋ช…ํ™•ํ•˜๊ฒŒ ์š”์•ฝํ•ด ์ฃผ์„ธ์š”."
29
- },
30
- {
31
- "role": "user",
32
- "content": text
33
- }
34
  ]
35
 
36
- input_text = tokenizer.apply_chat_template(
37
- messages,
38
- tokenize=False,
39
- add_generation_prompt=True
40
- )
41
-
42
  inputs = tokenizer([input_text], return_tensors="pt").to(device)
43
 
44
  with torch.no_grad():
@@ -54,6 +41,21 @@ def summarize(text):
54
 
55
  return result[0]
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  # 4. ์›น ์ธํ„ฐํŽ˜์ด์Šค ์ •์˜
58
  iface = gr.Interface(
59
  fn=summarize,
@@ -62,10 +64,14 @@ iface = gr.Interface(
62
  placeholder="์š”์•ฝํ•  ๋ฌธ์„œ๋ฅผ ์—ฌ๊ธฐ์— ๋ถ™์—ฌ๋„ฃ์œผ์„ธ์š”...",
63
  label="์ž…๋ ฅ ๋ฌธ์„œ"
64
  ),
65
- outputs=gr.Textbox(label="์š”์•ฝ ๊ฒฐ๊ณผ"),
 
 
 
 
66
  title="BizFlow ๋ฌธ์„œ ์š”์•ฝ๊ธฐ",
67
  description="Qwen2.5-7B + ํŒŒ์ธํŠœ๋‹(LoRA) ๋ชจ๋ธ ํ…Œ์ŠคํŠธ ๋ฐ๋ชจ์ž…๋‹ˆ๋‹ค. (ZeroGPU)",
68
- examples=[["์‚ผ์„ฑ์ „์ž๊ฐ€ ์˜ค๋Š˜ ์ปจํผ๋Ÿฐ์Šค์ฝœ์„ ํ†ตํ•ด ์ง€๋‚œํ•ด 4๋ถ„๊ธฐ ํ™•์ • ์‹ค์ ์„ ๋ฐœํ‘œํ–ˆ๋‹ค. ์—ฐ๊ฒฐ ๊ธฐ์ค€ ๋งค์ถœ์€ 67์กฐ 7800์–ต ์›์œผ๋กœ ์ „๋…„ ๋™๊ธฐ ๋Œ€๋น„ 3.8% ๊ฐ์†Œํ–ˆ์œผ๋‚˜, ์˜์—…์ด์ต์€ 2์กฐ 8200์–ต ์›์œผ๋กœ..."]]
69
  )
70
 
71
  if __name__ == "__main__":
 
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  from peft import PeftModel
5
+ import spaces
6
 
7
  # 1. ๋ชจ๋ธ ID ์„ค์ •
8
  base_id = "Qwen/Qwen2.5-7B-Instruct"
9
  adapter_id = "epinfomax/BizFlow-Summarizer-Ko"
10
 
11
+ # 2. ๋ชจ๋ธ ๋กœ๋“œ
12
  print(f"๐Ÿš€ ๋ชจ๋ธ ๋กœ๋”ฉ ์ค‘...")
 
13
  tokenizer = AutoTokenizer.from_pretrained(base_id)
14
+ model = AutoModelForCausalLM.from_pretrained(base_id, torch_dtype=torch.float16)
15
  model = PeftModel.from_pretrained(model, adapter_id)
16
 
17
+ # 3. ์ถ”๋ก  ํ•จ์ˆ˜
18
+ @spaces.GPU
19
  def summarize(text):
20
+ device = "cuda"
21
+ model.to(device)
 
22
 
23
  messages = [
24
+ {"role": "system", "content": "๋‹น์‹ ์€ ๋น„์ฆˆ๋‹ˆ์Šค ๋ฌธ์„œ๋ฅผ ์ „๋ฌธ์ ์œผ๋กœ ์š”์•ฝํ•˜๋Š” AI ์–ด์‹œ์Šคํ„ดํŠธ์ž…๋‹ˆ๋‹ค. ํ•ต์‹ฌ ๋‚ด์šฉ์„ ๋ช…ํ™•ํ•˜๊ฒŒ ์š”์•ฝํ•ด ์ฃผ์„ธ์š”."},
25
+ {"role": "user", "content": text}
 
 
 
 
 
 
26
  ]
27
 
28
+ input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 
 
 
 
 
29
  inputs = tokenizer([input_text], return_tensors="pt").to(device)
30
 
31
  with torch.no_grad():
 
41
 
42
  return result[0]
43
 
44
+ # โ˜… ์˜ˆ์‹œ์šฉ ๊ธด ๋ฌธ์žฅ ๋ฐ์ดํ„ฐ (์‹ค์ œ ๊ธฐ์‚ฌ ์Šคํƒ€์ผ)
45
+ example_text = """์‚ผ์„ฑ์ „์ž๊ฐ€ 2024๋…„ 4๋ถ„๊ธฐ ์—ฐ๊ฒฐ ๊ธฐ์ค€ ๋งค์ถœ 67์กฐ 7800์–ต ์›, ์˜์—…์ด์ต 2์กฐ 8200์–ต ์›์„ ๊ธฐ๋กํ–ˆ๋‹ค๊ณ  31์ผ ํ™•์ • ๊ณต์‹œํ–ˆ๋‹ค.
46
+
47
+ ์ „๋…„ ๋™๊ธฐ ๋Œ€๋น„ ๋งค์ถœ์€ 3.81% ๊ฐ์†Œํ–ˆ๊ณ , ์˜์—…์ด์ต์€ 34.4% ์ค„์—ˆ๋‹ค. ๋ฉ”๋ชจ๋ฆฌ ๋ฐ˜๋„์ฒด ์‹œํ™ฉ ํšŒ๋ณต์œผ๋กœ ์ „๋ถ„๊ธฐ ๋Œ€๋น„ ๋งค์ถœ๊ณผ ์˜์—…์ด์ต์€ ๊ฐœ์„ ๋๋‹ค.
48
+ ๋ถ€๋ฌธ๋ณ„๋กœ ๋ณด๋ฉด ๋ฐ˜๋„์ฒด(DS) ๋ถ€๋ฌธ์€ ๋งค์ถœ 21์กฐ 7000์–ต ์›, ์˜์—…์†์‹ค 2์กฐ 1800์–ต ์›์„ ๊ธฐ๋กํ–ˆ๋‹ค.
49
+ D๋žจ์€ ์žฌ๊ณ  ์ˆ˜์ค€์ด ํฐ ํญ์œผ๋กœ ๊ฐœ์„ ๋œ ๊ฐ€์šด๋ฐ ํ‘์ž ์ „ํ™˜์— ์„ฑ๊ณตํ–ˆ๋‹ค. ํŒŒ์šด๋“œ๋ฆฌ๋Š” ์‹œ์žฅ ์ˆ˜์š” ์นจ์ฒด๋กœ ์‹ค์  ๋ถ€์ง„์ด ์ง€์†๋๋‹ค.
50
+
51
+ ๋ชจ๋ฐ”์ผ(MX)๊ณผ ๋„คํŠธ์›Œํฌ ๋ถ€๋ฌธ์€ ๋งค์ถœ 25์กฐ 400์–ต ์›, ์˜์—…์ด์ต 2์กฐ 7300์–ต ์›์„ ๋‹ฌ์„ฑํ–ˆ๋‹ค.
52
+ ์‹ ์ œํ’ˆ ์ถœ์‹œ ํšจ๊ณผ๊ฐ€ ๋‘”ํ™”๋˜๋ฉฐ ์Šค๋งˆํŠธํฐ ํŒ๋งค๋Ÿ‰์€ ์ค„์—ˆ์ง€๋งŒ, ํƒœ๋ธ”๋ฆฟ๊ณผ ์›จ์–ด๋Ÿฌ๋ธ” ์ œํ’ˆ ํŒ๋งค๊ฐ€ ๊ฒฌ์กฐํ–ˆ๋‹ค.
53
+ ์˜์ƒ๋””์Šคํ”Œ๋ ˆ์ด(VD)์™€ ๊ฐ€์ „ ๋ถ€๋ฌธ์€ ๋งค์ถœ 14์กฐ 2200์–ต ์›, ์˜์—…์†์‹ค 500์–ต ์›์„ ๊ธฐ๋กํ–ˆ๋‹ค.
54
+ ๊ธ€๋กœ๋ฒŒ TV ์ˆ˜์š” ์ •์ฒด์—๋„ ํ”„๋ฆฌ๋ฏธ์—„ ์ œํ’ˆ ์ค‘์‹ฌ์œผ๋กœ ํŒ๋งค ๊ตฌ์กฐ๋ฅผ ๊ฐœ์„ ํ–ˆ๋‹ค.
55
+
56
+ ์‚ผ์„ฑ์ „์ž๋Š” "์˜ฌํ•ด 1๋ถ„๊ธฐ์—๋Š” IT ์‹œํ™ฉ ํšŒ๋ณต์„ธ๊ฐ€ ๊ธฐ๋Œ€๋˜๋Š” ๋งŒํผ, ๊ณ ๋ถ€๊ฐ€๊ฐ€์น˜ ์ œํ’ˆ ํŒ๋งค๋ฅผ ํ™•๋Œ€ํ•ด ์ˆ˜์ต์„ฑ ๊ฐœ์„ ์— ์ฃผ๋ ฅํ•  ๊ณ„ํš"์ด๋ผ๊ณ  ๋ฐํ˜”๋‹ค.
57
+ ํŠนํžˆ ์ƒ์„ฑํ˜• AI์šฉ ๊ณ ๋Œ€์—ญํญ๋ฉ”๋ชจ๋ฆฌ(HBM) ๋“ฑ ์„ ๋‹จ ๊ณต์ • ์ œํ’ˆ ํŒ๋งค๋ฅผ ๋Œ€ํญ ๋Š˜๋ฆด ๋ฐฉ์นจ์ด๋‹ค."""
58
+
59
  # 4. ์›น ์ธํ„ฐํŽ˜์ด์Šค ์ •์˜
60
  iface = gr.Interface(
61
  fn=summarize,
 
64
  placeholder="์š”์•ฝํ•  ๋ฌธ์„œ๋ฅผ ์—ฌ๊ธฐ์— ๋ถ™์—ฌ๋„ฃ์œผ์„ธ์š”...",
65
  label="์ž…๋ ฅ ๋ฌธ์„œ"
66
  ),
67
+ outputs=gr.Textbox(
68
+ lines=10, # โ˜… ๊ฒฐ๊ณผ์ฐฝ ๋†’์ด๋ฅผ 10์ค„๋กœ ๊ณ ์ •ํ•˜์—ฌ ์‹œ์›ํ•˜๊ฒŒ ๋งŒ๋“ฆ
69
+ label="์š”์•ฝ ๊ฒฐ๊ณผ",
70
+ show_copy_button=True # โ˜… ๋ณต์‚ฌ ๋ฒ„ํŠผ ์ถ”๊ฐ€ (ํŽธ์˜์„ฑ)
71
+ ),
72
  title="BizFlow ๋ฌธ์„œ ์š”์•ฝ๊ธฐ",
73
  description="Qwen2.5-7B + ํŒŒ์ธํŠœ๋‹(LoRA) ๋ชจ๋ธ ํ…Œ์ŠคํŠธ ๋ฐ๋ชจ์ž…๋‹ˆ๋‹ค. (ZeroGPU)",
74
+ examples=[[example_text]] # โ˜… ์œ„์—์„œ ์ •์˜ํ•œ ๊ธด ๋ฌธ์žฅ์„ ์˜ˆ์‹œ๋กœ ๋„ฃ์Œ
75
  )
76
 
77
  if __name__ == "__main__":