summarytest / app.py
epinfomax's picture
Update app.py
2fdbd68 verified
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import spaces
# 1. ๋ชจ๋ธ ID ์„ค์ •
base_id = "Qwen/Qwen2.5-7B-Instruct"
adapter_id = "epinfomax/BizFlow-Summarizer-Ko"
# 2. ๋ชจ๋ธ ๋กœ๋“œ
print(f"๐Ÿš€ ๋ชจ๋ธ ๋กœ๋”ฉ ์ค‘...")
tokenizer = AutoTokenizer.from_pretrained(base_id)
model = AutoModelForCausalLM.from_pretrained(base_id, torch_dtype=torch.float16)
model = PeftModel.from_pretrained(model, adapter_id)
# 3. ์ถ”๋ก  ํ•จ์ˆ˜
@spaces.GPU
def summarize(text):
device = "cuda"
model.to(device)
messages = [
{"role": "system", "content": "๋‹น์‹ ์€ ๋น„์ฆˆ๋‹ˆ์Šค ๋ฌธ์„œ๋ฅผ ์ „๋ฌธ์ ์œผ๋กœ ์š”์•ฝํ•˜๋Š” AI ์–ด์‹œ์Šคํ„ดํŠธ์ž…๋‹ˆ๋‹ค. ํ•ต์‹ฌ ๋‚ด์šฉ์„ ๋ช…ํ™•ํ•˜๊ฒŒ ์š”์•ฝํ•ด ์ฃผ์„ธ์š”."},
{"role": "user", "content": text}
]
input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer([input_text], return_tensors="pt").to(device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=512,
temperature=0.3,
repetition_penalty=1.1
)
generated_tokens = outputs[:, inputs.input_ids.shape[1]:]
result = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
return result[0]
# โ˜… ์˜ˆ์‹œ์šฉ ๊ธด ๋ฌธ์žฅ ๋ฐ์ดํ„ฐ (์‹ค์ œ ๊ธฐ์‚ฌ ์Šคํƒ€์ผ)
example_text = """์‚ผ์„ฑ์ „์ž๊ฐ€ 2024๋…„ 4๋ถ„๊ธฐ ์—ฐ๊ฒฐ ๊ธฐ์ค€ ๋งค์ถœ 67์กฐ 7800์–ต ์›, ์˜์—…์ด์ต 2์กฐ 8200์–ต ์›์„ ๊ธฐ๋กํ–ˆ๋‹ค๊ณ  31์ผ ํ™•์ • ๊ณต์‹œํ–ˆ๋‹ค.
์ „๋…„ ๋™๊ธฐ ๋Œ€๋น„ ๋งค์ถœ์€ 3.81% ๊ฐ์†Œํ–ˆ๊ณ , ์˜์—…์ด์ต์€ 34.4% ์ค„์—ˆ๋‹ค. ๋ฉ”๋ชจ๋ฆฌ ๋ฐ˜๋„์ฒด ์‹œํ™ฉ ํšŒ๋ณต์œผ๋กœ ์ „๋ถ„๊ธฐ ๋Œ€๋น„ ๋งค์ถœ๊ณผ ์˜์—…์ด์ต์€ ๊ฐœ์„ ๋๋‹ค.
๋ถ€๋ฌธ๋ณ„๋กœ ๋ณด๋ฉด ๋ฐ˜๋„์ฒด(DS) ๋ถ€๋ฌธ์€ ๋งค์ถœ 21์กฐ 7000์–ต ์›, ์˜์—…์†์‹ค 2์กฐ 1800์–ต ์›์„ ๊ธฐ๋กํ–ˆ๋‹ค.
D๋žจ์€ ์žฌ๊ณ  ์ˆ˜์ค€์ด ํฐ ํญ์œผ๋กœ ๊ฐœ์„ ๋œ ๊ฐ€์šด๋ฐ ํ‘์ž ์ „ํ™˜์— ์„ฑ๊ณตํ–ˆ๋‹ค. ํŒŒ์šด๋“œ๋ฆฌ๋Š” ์‹œ์žฅ ์ˆ˜์š” ์นจ์ฒด๋กœ ์‹ค์  ๋ถ€์ง„์ด ์ง€์†๋๋‹ค.
๋ชจ๋ฐ”์ผ(MX)๊ณผ ๋„คํŠธ์›Œํฌ ๋ถ€๋ฌธ์€ ๋งค์ถœ 25์กฐ 400์–ต ์›, ์˜์—…์ด์ต 2์กฐ 7300์–ต ์›์„ ๋‹ฌ์„ฑํ–ˆ๋‹ค.
์‹ ์ œํ’ˆ ์ถœ์‹œ ํšจ๊ณผ๊ฐ€ ๋‘”ํ™”๋˜๋ฉฐ ์Šค๋งˆํŠธํฐ ํŒ๋งค๋Ÿ‰์€ ์ค„์—ˆ์ง€๋งŒ, ํƒœ๋ธ”๋ฆฟ๊ณผ ์›จ์–ด๋Ÿฌ๋ธ” ์ œํ’ˆ ํŒ๋งค๊ฐ€ ๊ฒฌ์กฐํ–ˆ๋‹ค.
์˜์ƒ๋””์Šคํ”Œ๋ ˆ์ด(VD)์™€ ๊ฐ€์ „ ๋ถ€๋ฌธ์€ ๋งค์ถœ 14์กฐ 2200์–ต ์›, ์˜์—…์†์‹ค 500์–ต ์›์„ ๊ธฐ๋กํ–ˆ๋‹ค.
๊ธ€๋กœ๋ฒŒ TV ์ˆ˜์š” ์ •์ฒด์—๋„ ํ”„๋ฆฌ๋ฏธ์—„ ์ œํ’ˆ ์ค‘์‹ฌ์œผ๋กœ ํŒ๋งค ๊ตฌ์กฐ๋ฅผ ๊ฐœ์„ ํ–ˆ๋‹ค.
์‚ผ์„ฑ์ „์ž๋Š” "์˜ฌํ•ด 1๋ถ„๊ธฐ์—๋Š” IT ์‹œํ™ฉ ํšŒ๋ณต์„ธ๊ฐ€ ๊ธฐ๋Œ€๋˜๋Š” ๋งŒํผ, ๊ณ ๋ถ€๊ฐ€๊ฐ€์น˜ ์ œํ’ˆ ํŒ๋งค๋ฅผ ํ™•๋Œ€ํ•ด ์ˆ˜์ต์„ฑ ๊ฐœ์„ ์— ์ฃผ๋ ฅํ•  ๊ณ„ํš"์ด๋ผ๊ณ  ๋ฐํ˜”๋‹ค.
ํŠนํžˆ ์ƒ์„ฑํ˜• AI์šฉ ๊ณ ๋Œ€์—ญํญ๋ฉ”๋ชจ๋ฆฌ(HBM) ๋“ฑ ์„ ๋‹จ ๊ณต์ • ์ œํ’ˆ ํŒ๋งค๋ฅผ ๋Œ€ํญ ๋Š˜๋ฆด ๋ฐฉ์นจ์ด๋‹ค."""
# 4. ์›น ์ธํ„ฐํŽ˜์ด์Šค ์ •์˜
iface = gr.Interface(
fn=summarize,
inputs=gr.Textbox(
lines=15,
placeholder="์š”์•ฝํ•  ๋ฌธ์„œ๋ฅผ ์—ฌ๊ธฐ์— ๋ถ™์—ฌ๋„ฃ์œผ์„ธ์š”...",
label="์ž…๋ ฅ ๋ฌธ์„œ"
),
outputs=gr.Textbox(
lines=10, # โ˜… ๊ฒฐ๊ณผ์ฐฝ ๋†’์ด๋ฅผ 10์ค„๋กœ ๊ณ ์ •ํ•˜์—ฌ ์‹œ์›ํ•˜๊ฒŒ ๋งŒ๋“ฆ
label="์š”์•ฝ ๊ฒฐ๊ณผ",
#show_copy_button=True # โ˜… ๋ณต์‚ฌ ๋ฒ„ํŠผ ์ถ”๊ฐ€ (ํŽธ์˜์„ฑ)
),
title="BizFlow ๋ฌธ์„œ ์š”์•ฝ๊ธฐ",
description="Qwen2.5-7B + ํŒŒ์ธํŠœ๋‹(LoRA) ๋ชจ๋ธ ํ…Œ์ŠคํŠธ ๋ฐ๋ชจ์ž…๋‹ˆ๋‹ค. (ZeroGPU)",
examples=[[example_text]] # โ˜… ์œ„์—์„œ ์ •์˜ํ•œ ๊ธด ๋ฌธ์žฅ์„ ์˜ˆ์‹œ๋กœ ๋„ฃ์Œ
)
if __name__ == "__main__":
iface.launch()