Spaces:
Running
Running
| import os | |
| import subprocess | |
| import sys | |
| try: | |
| import llama_cpp | |
| except ImportError: | |
| print("Installing pre-built llama-cpp-python...") | |
| subprocess.check_call([ | |
| sys.executable, "-m", "pip", "install", | |
| "llama-cpp-python", | |
| "--extra-index-url", "https://abetlen.github.io/llama-cpp-python/whl/cpu" | |
| ]) | |
| import gradio as gr | |
| from llama_cpp import Llama | |
| from huggingface_hub import hf_hub_download | |
| # モデルの設定 | |
| model_id = "kawasumi/Tema_Q-R-4B-GGUF" | |
| model_file = "Tema_Q-R-4B-Q4_K_M.gguf" | |
| # 入力制限文字数 | |
| MAX_INPUT_CHARS = 300 | |
| print("Downloading model...") | |
| model_path = hf_hub_download(repo_id=model_id, filename=model_file) | |
| print(f"Loading model from {model_path}...") | |
| llm = Llama( | |
| model_path=model_path, | |
| n_ctx=1024, | |
| n_threads=2, | |
| use_mmap=False, | |
| n_batch=128, | |
| ) | |
| print("Model loaded.") | |
| def chat_response(message, history): | |
| # --- 文字数制限の追加 --- | |
| if len(message) > MAX_INPUT_CHARS: | |
| yield f"入力が長すぎます。{MAX_INPUT_CHARS}文字以内で入力してください。(現在 {len(message)} 文字)" | |
| return | |
| # ---------------------- | |
| prompt = f"<start_of_turn>user\n{message}<end_of_turn>\n<start_of_turn>model\n" | |
| try: | |
| output = llm( | |
| prompt, | |
| max_tokens=512, | |
| stop=["<end_of_turn>", "user"], | |
| stream=True | |
| ) | |
| response = "" | |
| for chunk in output: | |
| text = chunk["choices"][0]["text"] | |
| response += text | |
| yield response | |
| except Exception as e: | |
| yield f"エラーが発生しました: {str(e)}" | |
| # UIの構築(説明欄に制限の記載を追加) | |
| demo = gr.ChatInterface( | |
| fn=chat_response, | |
| title="Tema_Q-R-4B Chat", | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860) |