import gradio as gr
import random 
from huggingface_hub import InferenceClient

client = InferenceClient("google/gemma-3-27b-it")
##change the LLM

def respond(message, history):
    messages = [{"role":"system","content":"You are a sarcastic chatbot"}]
    if history :
        messages.extend(history)  
    messages.append({"role":"user","content": message})
    
    response = ""

    for message in client.chat_completion(
        messages,
        max_tokens = 400,
        stream = True
    ):
        token = message.choices[0].delta.content
        response += token
        yield response


    # top_p & temperature


#return response["choices"][0]["message"]["content"].strip()
    

#def random_message  (message, history):
   # choices = ('yes','no','i dont think so','be so for real','yep', 'woooow', 'hmmmm', 'idk', 'idc', 'yaaay', 'why would i know')
  #  wow = random.choice(choices)
#    theme = soft
  #  return wow


chatbot = gr.ChatInterface(respond, type = "messages", title = "chatty")


chatbot.launch(debug=True)

# repeats messages.