epinfomax commited on
Commit
6fabab2
·
verified ·
1 Parent(s): 6e61e2a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -0
app.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
+ from peft import PeftModel
5
+
6
+ base_id = "Qwen/Qwen2.5-7B-Instruct"
7
+ adapter_id = "epinfomax/BizFlow-Summarizer-Ko"
8
+
9
+ # CPU 환경(무료)이면 float32, GPU 환경이면 float16/bfloat16 사용
10
+ device = "cuda" if torch.cuda.is_available() else "cpu"
11
+ dtype = torch.float16 if device == "cuda" else torch.float32
12
+
13
+ tokenizer = AutoTokenizer.from_pretrained(base_id)
14
+ model = AutoModelForCausalLM.from_pretrained(base_id, torch_dtype=dtype)
15
+ model = PeftModel.from_pretrained(model, adapter_id)
16
+ model.to(device)
17
+
18
+ def summarize(text):
19
+ messages =
20
+ prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
21
+ inputs = tokenizer(prompt, return_tensors="pt").to(device)
22
+
23
+ outputs = model.generate(**inputs, max_new_tokens=512, temperature=0.3)
24
+ return tokenizer.decode(outputs[inputs.input_ids.shape[1]:], skip_special_tokens=True)
25
+
26
+ iface = gr.Interface(fn=summarize, inputs="text", outputs="text", title="BizFlow 요약기")
27
+ iface.launch()