Spaces:

epinfomax
/

summarytest

Sleeping

App Files Files Community

epinfomax commited on Dec 30, 2025

Commit

7ef137c

verified ·

1 Parent(s): 6fabab2

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -7

app.py CHANGED Viewed

@@ -3,25 +3,64 @@ import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from peft import PeftModel
 base_id = "Qwen/Qwen2.5-7B-Instruct"
 adapter_id = "epinfomax/BizFlow-Summarizer-Ko"
-# CPU 환경(무료)이면 float32, GPU 환경이면 float16/bfloat16 사용
 device = "cuda" if torch.cuda.is_available() else "cpu"
 dtype = torch.float16 if device == "cuda" else torch.float32
 tokenizer = AutoTokenizer.from_pretrained(base_id)
 model = AutoModelForCausalLM.from_pretrained(base_id, torch_dtype=dtype)
 model = PeftModel.from_pretrained(model, adapter_id)
 model.to(device)
 def summarize(text):
     messages =
-    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-    inputs = tokenizer(prompt, return_tensors="pt").to(device)
-    outputs = model.generate(**inputs, max_new_tokens=512, temperature=0.3)
-    return tokenizer.decode(outputs[inputs.input_ids.shape[1]:], skip_special_tokens=True)
-iface = gr.Interface(fn=summarize, inputs="text", outputs="text", title="BizFlow 요약기")
-iface.launch()

 from transformers import AutoTokenizer, AutoModelForCausalLM
 from peft import PeftModel
+# 1. 모델 ID 설정
 base_id = "Qwen/Qwen2.5-7B-Instruct"
 adapter_id = "epinfomax/BizFlow-Summarizer-Ko"
+# 2. 하드웨어 자동 설정 (GPU가 없으면 CPU로 돌아가도록 처리)
 device = "cuda" if torch.cuda.is_available() else "cpu"
 dtype = torch.float16 if device == "cuda" else torch.float32
+print(f"🚀 모델을 로딩 중입니다... (Device: {device})")
+# 3. 모델과 토크나이저 불러오기
 tokenizer = AutoTokenizer.from_pretrained(base_id)
 model = AutoModelForCausalLM.from_pretrained(base_id, torch_dtype=dtype)
 model = PeftModel.from_pretrained(model, adapter_id)
 model.to(device)
+model.eval()
 def summarize(text):
+    # 오류가 났던 부분 수정: messages 리스트를 명확히 정의
     messages =
+    # Qwen 채팅 템플릿 적용
+    input_text = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True
+    )
+    # 입력 토큰화
+    inputs = tokenizer([input_text], return_tensors="pt").to(device)
+    # 추론 (요약 생성)
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=512,  # 생성할 최대 길이
+            temperature=0.3,     # 값이 낮을수록 사실적인 요약
+            repetition_penalty=1.1
+        )
+    # 결과 디코딩 (입력 프롬프트 제외하고 순수 요약문만 추출)
+    summary = tokenizer.decode(outputs[inputs.input_ids.shape[1]:], skip_special_tokens=True)
+    return summary
+# 4. 웹 인터페이스 구성 (Gradio)
+iface = gr.Interface(
+    fn=summarize,
+    inputs=gr.Textbox(
+        lines=15,
+        placeholder="여기에 요약할 뉴스 기사나 회의록을 붙여넣으세요...",
+        label="입력 문서"
+    ),
+    outputs=gr.Textbox(label="요약 결과"),
+    title="BizFlow 문서 요약 에이전트",
+    description="Qwen2.5-7B 모델을 파인튜닝하여 만든 한국어 전문 요약기입니다.",
+    examples=["삼성전자가 오늘 컨퍼런스콜을 통해 지난해 4분기 확정 실적을 발표했다. 연결 기준 매출은 67조 7800억 원으로 전년 동기 대비 3.8% 감소했으나, 영업이익은 2조 8200억 원으로..."]
+)
+# 앱 실행
+if __name__ == "__main__":
+    iface.launch()