Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import pipeline | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from transformers import BitsAndBytesConfig | |
| import tempfile | |
| # モデルのロード | |
| model_name = "elyza/ELYZA-japanese-CodeLlama-7b-instruct" | |
| # bnb_config = BitsAndBytesConfig( | |
| # load_in_8bit=True, # 8ビット量子化を有効化 | |
| #) | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| # quantization_config=bnb_config, | |
| # offload_folder="./offload" | |
| device_map="auto" # 自動的にGPUまたはCPUに割り当て | |
| ) | |
| with tempfile.TemporaryDirectory() as tmp_dir: | |
| model.save_pretrained(tmp_dir, max_shard_size="5GB") | |
| print(sorted(os.listdir(tmp_dir))) | |
| pipe = pipeline("text-generation", model=model, device=0) # GPUを使用する場合はdevice=0 | |
| # 応答生成関数 | |
| def generate_response(prompt): | |
| result = pipe(prompt, max_length=200, do_sample=True, top_k=50, top_p=0.95) | |
| return result[0]['generated_text'] | |
| # Gradioインターフェースの定義 | |
| with gr.Blocks() as demo: | |
| gr.Markdown("### ELYZA-japanese-CodeLlama-7b-instruct テストアプリ") | |
| with gr.Row(): | |
| with gr.Column(): | |
| prompt_input = gr.Textbox(label="プロンプトを入力", placeholder="ここにテキストを入力してください", lines=4) | |
| submit_button = gr.Button("送信") | |
| with gr.Column(): | |
| output_box = gr.Textbox(label="結果", placeholder="生成結果がここに表示されます", lines=10) | |
| submit_button.click(generate_response, inputs=prompt_input, outputs=output_box) | |
| # アプリの起動 | |
| demo.launch() | |