Spaces:

NamuTechnology
/

NamuLM

Sleeping

File size: 3,724 Bytes

import os
import subprocess
import sys

# 필요한 패키지 자동 설치
def install_packages():
    packages = [
        "transformers==4.45.0",
        "torch",
        "accelerate",
        "sentencepiece"
    ]
    for package in packages:
        subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", package])

print("필요한 패키지 설치 중...")
install_packages()
print("패키지 설치 완료!")

import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# 모델 로딩
print("모델 로딩 중...")
model_name = "microsoft/Phi-3-mini-4k-instruct"

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
    device_map="auto",
    trust_remote_code=True
)
print("모델 로딩 완료!")

def format_chat_prompt(message, chat_history):
    """채팅 히스토리를 프롬프트로 변환"""
    prompt = ""
    for user_msg, assistant_msg in chat_history:
        if user_msg:
            prompt += f"User: {user_msg}\n"
        if assistant_msg:
            prompt += f"Assistant: {assistant_msg}\n"
    prompt += f"User: {message}\nAssistant:"
    return prompt

def chat(message, history):
    """채팅 응답 생성"""
    # 프롬프트 생성
    prompt = format_chat_prompt(message, history)
    
    # 토크나이징
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
    if torch.cuda.is_available():
        inputs = inputs.to("cuda")
    
    # 응답 생성
    with torch.no_grad():
        outputs = model.generate(
            inputs.input_ids,
            max_new_tokens=512,
            temperature=0.7,
            do_sample=True,
            top_p=0.9,
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )
    
    # 응답 디코딩
    response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
    
    # 불필요한 부분 제거
    response = response.split("User:")[0].strip()
    
    return response

# Gradio 인터페이스 생성
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown(
        """
        # 🤖 AI 채팅 서비스
        ### Phi-3-mini 4B 모델 기반 대화형 AI
        자유롭게 질문하고 대화해보세요!
        """
    )
    
    chatbot = gr.Chatbot(
        height=500,
        bubble_full_width=False,
        avatar_images=(None, "🤖"),
    )
    
    with gr.Row():
        msg = gr.Textbox(
            label="메시지 입력",
            placeholder="메시지를 입력하세요...",
            scale=4,
            container=False
        )
        submit = gr.Button("전송", scale=1, variant="primary")
    
    with gr.Row():
        clear = gr.Button("대화 초기화")
    
    gr.Examples(
        examples=[
            "안녕하세요! 자기소개 부탁드려요.",
            "Python으로 간단한 계산기 만드는 방법 알려줘",
            "오늘의 명언 하나 들려줘",
            "기분 좋아지는 농담 해줘",
        ],
        inputs=msg,
        label="예시 질문"
    )
    
    # 이벤트 핸들러
    def respond(message, chat_history):
        bot_message = chat(message, chat_history)
        chat_history.append((message, bot_message))
        return "", chat_history
    
    msg.submit(respond, [msg, chatbot], [msg, chatbot])
    submit.click(respond, [msg, chatbot], [msg, chatbot])
    clear.click(lambda: None, None, chatbot, queue=False)

# 앱 실행
if __name__ == "__main__":
    demo.queue().launch()