import gradio as gr import random from huggingface_hub import InferenceClient client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") #change the LLM def respond(message, history): messages = [{"role": "system", "content": "You are a professional interviewer."}] if history: messages.extend(history) messages.append({"role":"user", "content": message}) response = "" for message in client.chat_completion( messages, max_tokens = 100, #change the length temperature = 0.5, top_p = 0.7, stream=True ): token = message.choices[0].delta.content response += token yield response def echo(message, history): choices = ["Without a doubt", "Ask again later","Yes, definitely","Don't count on it"] response = random.choice(choices) return response with gr.Blocks(css=""" #title { font-size: 28px; color: #00e676; text-align: center; } .message.user { background-color: #1e88e5; color: white; border-radius: 12px; } .message.bot { background-color: #424242; color: #eeeeee; border-radius: 12px; } .chatbot { background-color: #121212; padding: 10px; border-radius: 16px; } body { background-color: #111 !important; } """) as demo: gr.Markdown("#