Spaces:

epinfomax
/

summarytest

Sleeping

App Files Files Community

epinfomax commited on Dec 30, 2025

Commit

190ca83

verified ·

1 Parent(s): f88ea6a

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -20

app.py CHANGED Viewed

@@ -1,31 +1,79 @@
 import gradio as gr
-from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
 import torch
-model_id = "epinfomax/BizFlow-Summarizer-Ko"
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-model = AutoModelForCausalLM.from_pretrained(
-    model_id,
-    torch_dtype=torch.float16,
-    device_map="auto"
-)
 def summarize(text):
-    prompt = f"다음 글을 요약해주세요:\n\n{text}"
-    messages = [{"role": "user", "content": prompt}]
-    input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt").to(model.device)
-    outputs = model.generate(input_ids, max_new_tokens=512, do_sample=True, temperature=0.7)
-    response = tokenizer.decode(outputs[0][input_ids.shape[1]:], skip_special_tokens=True)
-    return response
-demo = gr.Interface(
     fn=summarize,
-    inputs=gr.Textbox(lines=10, label="원문", placeholder="요약할 텍스트를 입력하세요..."),
-    outputs=gr.Textbox(lines=5, label="요약"),
-    title="BizFlow Summarizer Ko",
-    description="한국어 뉴스/문서 요약 모델"
 )
-demo.launch()

 import gradio as gr
 import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from peft import PeftModel
+# 1. 모델 ID 설정
+base_id = "Qwen/Qwen2.5-7B-Instruct"
+adapter_id = "epinfomax/BizFlow-Summarizer-Ko"
+# 2. 하드웨어 설정 (GPU/CPU 자동 감지)
+device = "cuda" if torch.cuda.is_available() else "cpu"
+dtype = torch.float16 if device == "cuda" else torch.float32
+print(f"🚀 모델 로딩 중... (Device: {device})")
+# 3. 모델 로드
+tokenizer = AutoTokenizer.from_pretrained(base_id)
+model = AutoModelForCausalLM.from_pretrained(base_id, torch_dtype=dtype)
+model = PeftModel.from_pretrained(model, adapter_id)
+model.to(device)
+model.eval()
 def summarize(text):
+    # ★ 수정된 부분: messages 리스트 구조화
+    # 시스템 프롬프트(지시사항)와 사용자 입력(text)을 딕셔너리 리스트로 만듭니다.
+    messages = [
+        {
+            "role": "system",
+            "content": "당신은 비즈니스 문서를 전문적으로 요약하는 AI 어시스턴트입니다. 핵심 내용을 명확하게 요약해 주세요."
+        },
+        {
+            "role": "user",
+            "content": text
+        }
+    ]
+    # 입력 텍스트 포맷팅 (Chat Template 적용)
+    input_text = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True
+    )
+    # 토크나이징 및 GPU 이동
+    inputs = tokenizer([input_text], return_tensors="pt").to(device)
+    # 추론
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=512,
+            temperature=0.3,
+            repetition_penalty=1.1
+        )
+    # 결과 디코딩 (입력 프롬프트 제외)
+    generated_tokens = outputs[:, inputs.input_ids.shape[1]:]
+    result = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+    # batch_decode는 리스트를 반환하므로 첫 번째 요소만 반환하여 깔끔하게 출력
+    return result[0]
+# 4. 웹 인터페이스 정의
+iface = gr.Interface(
     fn=summarize,
+    inputs=gr.Textbox(
+        lines=15,
+        placeholder="요약할 문서를 여기에 붙여넣으세요...",
+        label="입력 문서"
+    ),
+    outputs=gr.Textbox(label="요약 결과"),
+    title="BizFlow 문서 요약기",
+    description="Qwen2.5-7B + 파인튜닝(LoRA) 모델 테스트 데모입니다.",
+    examples=[["삼성전자가 오늘 컨퍼런스콜을 통해 지난해 4분기 확정 실적을 발표했다. 연결 기준 매출은 67조 7800억 원으로 전년 동기 대비 3.8% 감소했으나, 영업이익은 2조 8200억 원으로..."]]
 )
+# 앱 실행
+if __name__ == "__main__":
+    iface.launch()