import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer import torch # ここを Llama / Mistral など好きなモデルに変更 MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2" # MODEL_NAME = "meta-llama/Llama-3.1-8B-Instruct" # ← Llama に変更したい場合 # モデルとトークナイザのロード tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, torch_dtype=torch.float16, device_map="auto" ) def chat_fn(message, history): # 過去履歴を LLM のプロンプト形式に変換 prompt = "" for user, assistant in history: prompt += f"[ユーザー]: {user}\n[アシスタント]: {assistant}\n" prompt += f"[ユーザー]: {message}\n[アシスタント]:" inputs = tokenizer(prompt, return_tensors="pt").to(model.device) output_ids = model.generate( **inputs, max_new_tokens=200, temperature=0.7, do_sample=True, top_p=0.9 ) response = tokenizer.decode(output_ids[0], skip_special_tokens=True) # 最後のアシスタント発言だけ抽出 if "[アシスタント]:" in response: response = response.split("[アシスタント]:")[-1].strip() history.append((message, response)) return response, history # Gradio UI with gr.Blocks() as demo: gr.Markdown("# 🦙💬 Simple Llama / Mistral Chatbot") chatbot = gr.Chatbot() msg = gr.Textbox(label="Message") def user_send(user_message, chat_history): return "", chat_history + [[user_message, None]] msg.submit(user_send, [msg, chatbot], [msg, chatbot]).then( chat_fn, [msg, chatbot], [chatbot] ) demo.launch()