import gradio as gr import time from huggingface_hub import InferenceClient # Initialize the InferenceClient with the model client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") def generate_dialogue(topic): # System message to instruct the model system_message = "Generate a short and engaging dialogue between two people on the given topic." # Conversation setup messages = [ {"role": "system", "content": system_message}, {"role": "user", "content": f"Create a dialogue about: {topic}"} ] response = "" # Generate the dialogue with streaming and delay start_time = time.time() for msg in client.chat_completion( messages, max_tokens=250, # Enough tokens for a dialogue stream=True, temperature=0.7, top_p=0.95, ): token = msg.choices[0].delta.content response += token # Yield response to stream the output progressively yield response.strip() # # If the dialogue is complete, stop after 10 seconds # if time.time() - start_time >= 10: # break # time.sleep(0.2) # Slows token output for smoother streaming # Define the Gradio UI with gr.Blocks() as demo: gr.Markdown("## 🎭 AI Dialogue Generator") gr.Markdown("Enter a topic, and the AI will generate a short dialogue between two people.") with gr.Row(): topic_input = gr.Textbox(label="Enter a topic", placeholder="e.g., Space Travel, AI in Healthcare") generate_btn = gr.Button("Generate Dialogue") output_box = gr.Textbox(label="Generated Dialogue", interactive=False, lines=10) generate_btn.click(generate_dialogue, inputs=topic_input, outputs=output_box) if __name__ == "__main__": demo.launch()