Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from huggingface_hub import hf_hub_download | |
| from llama_cpp import Llama | |
| import os | |
| import threading | |
| import time | |
| repo_id = "bartowski/Qwen2.5-1.5B-Instruct-GGUF" | |
| filename = "Qwen2.5-1.5B-Instruct-Q8_0.gguf" | |
| CONTEXT_SIZE = 1024 | |
| N_THREADS = 2 #FreeのCPUは2なので | |
| llm = None | |
| model_loaded = False | |
| def load_model(progress=gr.Progress()): | |
| global llm, model_loaded | |
| progress(0, desc="モデルのダウンロードを開始") | |
| model_path = hf_hub_download(repo_id=repo_id, filename=filename) | |
| progress(0.5, desc="モデルをメモリに読み込み中") | |
| llm = Llama( | |
| model_path=model_path, | |
| n_threads=N_THREADS, | |
| n_batch=8, | |
| verbose=False, | |
| n_ctx=CONTEXT_SIZE, | |
| ) | |
| progress(1, desc="モデルの読み込み完了") | |
| model_loaded = True | |
| return "モデルの読み込みが完了しました。" | |
| def get_llama_response(prompt, temperature): | |
| global llm, model_loaded | |
| if not model_loaded: | |
| return [{"choices": [{"text": "モデルを読み込んでいます。しばらくお待ちください..."}]}] | |
| try: | |
| return llm(prompt, max_tokens=1024, temperature=temperature, top_p=0.95, repeat_penalty=1.1, stream=True) | |
| except Exception as e: | |
| return [{"choices": [{"text": f"エラーが発生しました: {str(e)}"}]}] | |
| def greet(prompt, temperature): | |
| global model_loaded | |
| if not model_loaded: | |
| return "モデルを読み込んでいます。しばらくお待ちください..." | |
| full_response = "" | |
| for output in get_llama_response(prompt, temperature): | |
| if len(output['choices']) > 0: | |
| text_chunk = output['choices'][0]['text'] | |
| full_response += text_chunk | |
| yield full_response | |
| return full_response | |
| with gr.Blocks() as demo: | |
| gr.Markdown(f"# LLMチャットボット(Streaming)") | |
| gr.HighlightedText( | |
| value=[("", None), | |
| ("これはLLM", "positive"), | |
| ("の", None), | |
| ("テストアプリケーション", "neutral"), ("です。\n", None), | |
| ("内容は実験的", "neutral"), ("なため", None), | |
| ("重要な意思決定に用いない", "negative"), | |
| ("でください。", None) | |
| ], | |
| label="注意", | |
| show_label=False, | |
| ) | |
| with gr.Row(): | |
| input_text = gr.Textbox(label="プロンプトを入力してください") | |
| temperature = gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature") | |
| output_text = gr.Textbox(label="生成されたレスポンス") | |
| submit_button = gr.Button("送信") | |
| gr.Textbox(value=filename, label="モデル", interactive=False) | |
| loading_status = gr.Textbox(label="Loading Status") | |
| submit_button.click(fn=greet, inputs=[input_text, temperature], outputs=output_text) | |
| input_text.submit(fn=greet, inputs=[input_text, temperature], outputs=output_text) | |
| demo.load(fn=load_model, outputs=loading_status) | |
| demo.queue() | |
| demo.launch() |