import gradio as gr
import random
from huggingface_hub import InferenceClient

client = InferenceClient("google/gemma-3-27b-it") #change the LLM "HuggingFaceH4/zephyr-7b-beta"

def respond_miles(message, history):

    messages = [{"role": "system", "content": "Your name is Miles. You are the goofy nerd in highschool"}] # "content" is where u can change the personality
    if history:
        messages.extend(history)
        
    messages.append({"role" : "user", "content" : message})
    
    response = ""
    for message in client.chat_completion(
        messages, 
        max_tokens = 130,
        stream=True,
    ):
        token = message.choices[0].delta.content
        response += token
        yield response 
        
        #max_tokens is length
    
    print(response["choices"][0]["message"]["content"].strip())
        
    return response["choices"][0]["message"]["content"].strip()

def random_response(message, history):
    my_list = ["absolutely", "okay"]
    return random.choice(my_list)
    random.choice(responses)
    print(random_response)
    
chatbot = gr.ChatInterface(
    fn=respond_miles,
    type = "messages",
    title = "Meet Miles!", 
    description = "That nerdy boy from high school.", 
    theme = gr.themes.Ocean(primary_hue="green", secondary_hue="green", neutral_hue="green",
                           ))

chatbot.launch(debug=True)