Spaces:

sawac
/

llama_chat_test

Runtime error

App Files Files Community

sawac commited on Oct 13, 2024

Commit

3a0ea7c

verified ·

1 Parent(s): 6f38b94

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -12

app.py CHANGED Viewed

@@ -19,6 +19,7 @@ def load_model(progress=gr.Progress()):
     progress(0, desc="モデルのダウンロードを開始")
     model_path = hf_hub_download(repo_id=repo_id, filename=filename)
     progress(0.5, desc="モデルをメモリに読み込み中")
     llm = Llama(
         model_path=model_path,
         n_threads=N_THREADS,
@@ -29,45 +30,48 @@ def load_model(progress=gr.Progress()):
     progress(1, desc="モデルの読み込み完了")
     model_loaded = True
     return "モデルの読み込みが完了しました。"
-def get_llama_response(prompt):
     global llm, model_loaded
     if not model_loaded:
         return [{"choices": [{"text": "モデルを読み込んでいます。しばらくお待ちください..."}]}]
     try:
-        return llm(prompt, max_tokens=1024, temperature=0.7, top_p=0.95, repeat_penalty=1.1, stream=True)
     except Exception as e:
         return [{"choices": [{"text": f"エラーが発生しました: {str(e)}"}]}]
-def greet(prompt, intensity):
     global model_loaded
     if not model_loaded:
         return "モデルを読み込んでいます。しばらくお待ちください..."
     full_response = ""
-    for output in get_llama_response(prompt):
         if len(output['choices']) > 0:
             text_chunk = output['choices'][0]['text']
             full_response += text_chunk
             yield full_response
-    return full_response + "！" * int(intensity)
 with gr.Blocks() as demo:
-    gr.Markdown("# Llama.cpp-python-sample (Streaming)")
     gr.Markdown(f"MODEL: {filename} from {repo_id}")
     loading_status = gr.Textbox(label="Loading Status")
     with gr.Row():
-        input_text = gr.Textbox(label="Enter your prompt")
-        intensity = gr.Slider(minimum=0, maximum=10, step=1, label="Intensity")
-    output_text = gr.Textbox(label="Generated Response")
-    submit_button = gr.Button("Submit")
-    submit_button.click(fn=greet, inputs=[input_text, intensity], outputs=output_text)
     demo.load(fn=load_model, outputs=loading_status)
 demo.queue()
 demo.launch()

     progress(0, desc="モデルのダウンロードを開始")
     model_path = hf_hub_download(repo_id=repo_id, filename=filename)
     progress(0.5, desc="モデルをメモリに読み込み中")
     llm = Llama(
         model_path=model_path,
         n_threads=N_THREADS,
     progress(1, desc="モデルの読み込み完了")
     model_loaded = True
     return "モデルの読み込みが完了しました。"
+def get_llama_response(prompt, temperature):
     global llm, model_loaded
     if not model_loaded:
         return [{"choices": [{"text": "モデルを読み込んでいます。しばらくお待ちください..."}]}]
     try:
+        return llm(prompt, max_tokens=1024, temperature=temperature, top_p=0.95, repeat_penalty=1.1, stream=True)
     except Exception as e:
         return [{"choices": [{"text": f"エラーが発生しました: {str(e)}"}]}]
+def greet(prompt, temperature):
     global model_loaded
     if not model_loaded:
         return "モデルを読み込んでいます。しばらくお待ちください..."
     full_response = ""
+    for output in get_llama_response(prompt, temperature):
         if len(output['choices']) > 0:
             text_chunk = output['choices'][0]['text']
             full_response += text_chunk
             yield full_response
+    return full_response
 with gr.Blocks() as demo:
+    gr.Markdown(f"# LLMチャットボット(Streaming):{filename}")
     gr.Markdown(f"MODEL: {filename} from {repo_id}")
     loading_status = gr.Textbox(label="Loading Status")
     with gr.Row():
+        input_text = gr.Textbox(label="プロンプトを入力してください")
+        temperature = gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature")
+    output_text = gr.Textbox(label="生成されたレスポンス")
+    submit_button = gr.Button("送信")
+    submit_button.click(fn=greet, inputs=[input_text, temperature], outputs=output_text)
     demo.load(fn=load_model, outputs=loading_status)
 demo.queue()
 demo.launch()