| | import os |
| | import gradio as gr |
| | from llama_cpp import Llama |
| | import requests |
| | from tqdm import tqdm |
| |
|
| | |
| | MODEL_URL = "https://huggingface.co/mradermacher/Ultiima-78B-v2-GGUF/resolve/main/Ultiima-78B-v2.Q2_K.gguf" |
| | MODEL_PATH = "models/Ultiima-78B-v2.Q2_K.gguf" |
| |
|
| | |
| | SYSTEM_PROMPT = "あなたは丁寧で知的な日本語AIアシスタントです。ユーザーの質問にわかりやすく答えてください。" |
| |
|
| | def download_model(url=MODEL_URL, path=MODEL_PATH): |
| | os.makedirs(os.path.dirname(path), exist_ok=True) |
| | if os.path.exists(path): |
| | print("モデルファイルは既に存在します。") |
| | return |
| | print(f"モデルをダウンロード中: {url}") |
| | response = requests.get(url, stream=True) |
| | total = int(response.headers.get('content-length', 0)) |
| | with open(path, 'wb') as file, tqdm( |
| | desc=path, |
| | total=total, |
| | unit='iB', |
| | unit_scale=True, |
| | unit_divisor=1024, |
| | ) as bar: |
| | for data in response.iter_content(chunk_size=1024): |
| | size = file.write(data) |
| | bar.update(size) |
| | print("モデルのダウンロードが完了しました。") |
| |
|
| | |
| | download_model() |
| |
|
| | |
| | llm = Llama(model_path=MODEL_PATH) |
| |
|
| | def build_prompt(messages): |
| | prompt = f"<|system|>\n{SYSTEM_PROMPT}\n" |
| | for msg in messages: |
| | if msg["role"] == "user": |
| | prompt += f"<|user|>\n{msg['content']}\n" |
| | elif msg["role"] == "assistant": |
| | prompt += f"<|assistant|>\n{msg['content']}\n" |
| | prompt += "<|assistant|>\n" |
| | return prompt |
| |
|
| | def generate_response(messages, temperature, top_p, max_tokens): |
| | prompt = build_prompt(messages) |
| | response = llm.create_completion( |
| | prompt=prompt, |
| | temperature=temperature, |
| | top_p=top_p, |
| | max_tokens=max_tokens, |
| | stop=["<|user|>", "<|system|>", "<|assistant|>"] |
| | ) |
| | return response["choices"][0]["text"].strip() |
| |
|
| | def chat_interface(user_input, history, temperature, top_p, max_tokens): |
| | if history is None or len(history) == 0: |
| | history = [] |
| | history.append({"role": "user", "content": user_input}) |
| | response = generate_response(history, temperature, top_p, max_tokens) |
| | history.append({"role": "assistant", "content": response}) |
| | |
| | chat_display = [] |
| | for msg in history: |
| | role = "ユーザー" if msg["role"] == "user" else "AI" |
| | chat_display.append((role, msg["content"])) |
| | |
| | return chat_display, history |
| |
|
| | with gr.Blocks() as demo: |
| | gr.Markdown("# Saka-14B GGUF 日本語チャット(システムプロンプト+履歴対応)") |
| | chatbot = gr.Chatbot() |
| | user_input = gr.Textbox(placeholder="質問をどうぞ", label="あなたの入力") |
| | |
| | temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.05, label="Temperature(創造性)") |
| | top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.8, step=0.05, label="Top-p(確率の上位何%から生成するか)") |
| | max_tokens = gr.Slider(minimum=16, maximum=2048, value=512, step=16, label="最大トークン数") |
| | |
| | history = gr.State([]) |
| | |
| | submit_btn = gr.Button("送信") |
| | submit_btn.click(chat_interface, inputs=[user_input, history, temperature, top_p, max_tokens], outputs=[chatbot, history]) |
| | |
| | user_input.submit(chat_interface, inputs=[user_input, history, temperature, top_p, max_tokens], outputs=[chatbot, history]) |
| |
|
| | demo.launch() |