File size: 2,046 Bytes
e58f6e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e2eb426
e58f6e4
 
48a4e45
e58f6e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48a4e45
e58f6e4
48a4e45
e58f6e4
 
48a4e45
 
 
 
 
 
e58f6e4
 
 
 
 
48a4e45
 
e58f6e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import os
import subprocess
import sys

try:
    import llama_cpp
except ImportError:
    print("Installing pre-built llama-cpp-python...")
    subprocess.check_call([
        sys.executable, "-m", "pip", "install", 
        "llama-cpp-python", 
        "--extra-index-url", "https://abetlen.github.io/llama-cpp-python/whl/cpu"
    ])

import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download

# モデルの設定
model_id = "yuna1126/Tema_Q-R-0.4B-GGUF"
model_file = "Tema_Q-R-0.4B-f16.gguf"

# 入力制限文字数
MAX_INPUT_CHARS = 700

print("Downloading model...")
model_path = hf_hub_download(repo_id=model_id, filename=model_file)

print(f"Loading model from {model_path}...")
llm = Llama(
    model_path=model_path,
    n_ctx=1024,      
    n_threads=2,     
    use_mmap=False,  
    n_batch=128,     
)
print("Model loaded.")

def chat_response(message, history):
    # 文字数制限
    if len(message) > MAX_INPUT_CHARS:
        yield f"入力が長すぎます。{MAX_INPUT_CHARS}文字以内で入力してください。"
        return

    # テンプレートに合わせたプロンプト形式に変更
    # <|im_start|>user
    # メッセージ
    # <|im_end|>
    # <|im_start|>assistant
    prompt = f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
    
    try:
        output = llm(
            prompt,
            max_tokens=512,
            # 停止トークンもテンプレートに合わせて変更
            stop=["<|im_end|>", "<|im_start|>"], 
            stream=True
        )
        
        response = ""
        for chunk in output:
            text = chunk["choices"][0]["text"]
            response += text
            yield response
    except Exception as e:
        yield f"エラーが発生しました: {str(e)}"

# UIの構築(説明欄に制限の記載を追加)
demo = gr.ChatInterface(
    fn=chat_response, 
    title="Tema_Q-R-0.4B Chat",
)

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)