kawasumi's picture
Create app.py
49dde48 verified
import os
import subprocess
import sys
try:
import llama_cpp
except ImportError:
print("Installing pre-built llama-cpp-python...")
subprocess.check_call([
sys.executable, "-m", "pip", "install",
"llama-cpp-python",
"--extra-index-url", "https://abetlen.github.io/llama-cpp-python/whl/cpu"
])
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
# モデルの設定
model_id = "kawasumi/Tema_Q-R-4B-GGUF"
model_file = "Tema_Q-R-4B-Q4_K_M.gguf"
# 入力制限文字数
MAX_INPUT_CHARS = 300
print("Downloading model...")
model_path = hf_hub_download(repo_id=model_id, filename=model_file)
print(f"Loading model from {model_path}...")
llm = Llama(
model_path=model_path,
n_ctx=1024,
n_threads=2,
use_mmap=False,
n_batch=128,
)
print("Model loaded.")
def chat_response(message, history):
# --- 文字数制限の追加 ---
if len(message) > MAX_INPUT_CHARS:
yield f"入力が長すぎます。{MAX_INPUT_CHARS}文字以内で入力してください。(現在 {len(message)} 文字)"
return
# ----------------------
prompt = f"<start_of_turn>user\n{message}<end_of_turn>\n<start_of_turn>model\n"
try:
output = llm(
prompt,
max_tokens=512,
stop=["<end_of_turn>", "user"],
stream=True
)
response = ""
for chunk in output:
text = chunk["choices"][0]["text"]
response += text
yield response
except Exception as e:
yield f"エラーが発生しました: {str(e)}"
# UIの構築(説明欄に制限の記載を追加)
demo = gr.ChatInterface(
fn=chat_response,
title="Tema_Q-R-4B Chat",
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)