import gradio as gr
import time
from huggingface_hub import InferenceClient

# Initialize the InferenceClient with the model
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")

def generate_dialogue(topic):
    # System message to instruct the model
    system_message = "Generate a short and engaging dialogue between two people on the given topic."

    # Conversation setup
    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": f"Create a dialogue about: {topic}"}
    ]

    response = ""

    # Generate the dialogue with streaming and delay
    start_time = time.time()
    
    for msg in client.chat_completion(
        messages,
        max_tokens=250,  # Enough tokens for a dialogue
        stream=True,
        temperature=0.7,
        top_p=0.95,
    ):
        token = msg.choices[0].delta.content
        response += token
        
        # Yield response to stream the output progressively
        yield response.strip()

        # # If the dialogue is complete, stop after 10 seconds
        # if time.time() - start_time >= 10:
        #     break  
        # time.sleep(0.2)  # Slows token output for smoother streaming

# Define the Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## 🎭 AI Dialogue Generator")
    gr.Markdown("Enter a topic, and the AI will generate a short dialogue between two people.")

    with gr.Row():
        topic_input = gr.Textbox(label="Enter a topic", placeholder="e.g., Space Travel, AI in Healthcare")
        generate_btn = gr.Button("Generate Dialogue")
    
    output_box = gr.Textbox(label="Generated Dialogue", interactive=False, lines=10)

    generate_btn.click(generate_dialogue, inputs=topic_input, outputs=output_box)

if __name__ == "__main__":
    demo.launch()