import gradio as gr
import os
from huggingface_hub import InferenceClient

# Get token from environment variable for security
# In HuggingFace Spaces, set this in the Settings tab
HF_TOKEN = os.environ.get('HUGGINGFACE_TOKEN')

# Initialize the HuggingFace Inference Client
client = InferenceClient(token=HF_TOKEN)

def chatbot_hf(question, temperature=0.7, model='google/gemma-2-2b-it'):
    # Send the question to HuggingFace model
    response = client.chat_completion(
        model=model,
        messages=[{"role": "user", "content": question}],
        temperature=temperature,
        max_tokens=500  # Maximum length of response
    )
    # Extract and return the response text
    return response.choices[0].message.content

def main():
    # Define available models
    AVAILABLE_MODELS = [
        "google/gemma-2-2b-it",
        "meta-llama/Llama-2-7b-chat-hf",
        "mistralai/Mixtral-8x7B-Instruct-v0.1",
        "HuggingFaceH4/zephyr-7b-beta"
    ]

    # Create the Gradio interface with a more polished layout
    demo = gr.Interface(
        fn=chatbot_hf,
        inputs=[
            gr.Textbox(
                label="Your Question",
                lines=2,
                placeholder="Type your message here...",
                scale=3
            ),
            gr.Slider(
                label="Temperature",
                minimum=0.0,
                maximum=1.0,
                step=0.01,
                value=0.7,
                info="Higher values make output more random, lower values more focused"
            ),
            gr.Dropdown(
                label="Select Model",
                choices=AVAILABLE_MODELS,
                value=AVAILABLE_MODELS[0],
                info="Choose the AI model to chat with"
            ),
        ],
        outputs=gr.Textbox(label="AI Response", lines=20),
        title="🤖 HuggingFace Chat Interface",
        description="""
        Chat with various large language models hosted on HuggingFace.
        Adjust the temperature to control response creativity.
        """,
        article="""
        ### Tips
        - For factual responses, use lower temperature (0.1-0.3)
        - For creative writing, use higher temperature (0.7-0.9)
        - Different models may have different strengths
        """
    )

    demo.launch()

if __name__ == "__main__":
    main()