import streamlit as st
from huggingface_hub import InferenceClient
import os

# Constants
SPACE_URL = "https://z7svds7k42bwhhgm.us-east-1.aws.endpoints.huggingface.cloud"
HF_API_KEY = os.getenv("HF_API_KEY")
DUBS_PATH = "🐾"  # Optional: Replace with an avatar path if needed

# Streamlit Configuration
st.set_page_config(page_title="Chatbot Test", page_icon="🤖", layout="centered")

client = InferenceClient(SPACE_URL, token=HF_API_KEY)


def fetch_response(prompt):
    """
    Fetch full text response from the HF Inference Endpoint using the InferenceClient.
    Returns tokens in a streaming fashion.
    """
    partial_text = ""
    gen_kwargs = {
        "max_new_tokens": 512,
        "top_k": 30,
        "top_p": 0.9,
        "temperature": 0.2,
        "repetition_penalty": 1.02,
        "stop_sequences": ["<|endoftext|>"]
    }

    stream = client.text_generation(prompt, stream=True, details=True, **gen_kwargs)

    for response in stream:
        if response.token.special:
            continue
        # Stop if we encounter stop_sequences
        if response.token.text in gen_kwargs["stop_sequences"]:
            break
        partial_text += response.token.text
        yield response.token.text


# Streamlit Chat Interface
st.title("Chatbot Testing Interface")

# User Input Section
prompt = st.chat_input("Enter your message...")

if prompt:
    # Display the user's message
    st.chat_message("user").write(prompt)

    # Build the chat history (use prompt directly for stateless behavior)
    chat_history = f"<|user|>{prompt}<|end|> \n <|assistant|> "

    # Generate the response
    with st.spinner("Dubs is thinking... Woof Woof! 🐾"):
        with st.chat_message("assistant", avatar=DUBS_PATH):
            full_response = fetch_response(chat_history)
            st.write_stream(full_response)