| import gradio as gr |
| import torch |
| import random |
| import time |
| from transformers import pipeline |
|
|
| |
| generator = pipeline( |
| 'text-generation', |
| model="heegyu/gorani-v0", |
| device="cuda:0" if torch.cuda.is_available() else 'cpu' |
| ) |
|
|
| def query(message, chat_history, max_turn=2): |
| |
| |
| |
| |
| prompt = [] |
| if len(chat_history) > max_turn: |
| chat_history = chat_history[-max_turn:] |
| for i, (user, bot) in enumerate(chat_history): |
| |
| |
| |
| prompt.append(f"<usr> {user}") |
| prompt.append(f"<bot> {bot}") |
| prompt.append(f"<usr> {message}") |
| prompt = "\n".join(prompt) + "\n<bot>" |
|
|
| output = generator( |
| prompt, |
| do_sample=True, |
| top_p=0.9, |
| early_stopping=True, |
| max_new_tokens=256, |
| )[0]['generated_text'] |
|
|
| print(output) |
|
|
| response = output[len(prompt):] |
| return response.strip() |
|
|
| with gr.Blocks() as demo: |
| chatbot = gr.Chatbot().style(height=700) |
| msg = gr.Textbox() |
| clear = gr.Button("Clear") |
|
|
| def respond(message, chat_history): |
| bot_message = query(message, chat_history) |
| chat_history.append((message, bot_message)) |
| |
| return "", chat_history |
|
|
| msg.submit(respond, [msg, chatbot], [msg, chatbot]) |
| clear.click(lambda: None, None, chatbot, queue=False) |
|
|
| demo.launch() |