import os import requests import gradio as gr API_URL = "https://api-inference.huggingface.co/models/EleutherAI/gpt-neo-125M" print("Loaded token:", os.environ.get('HUGGINGFACEHUB_API_TOKEN')) HEADERS = { "Authorization": f"Bearer {os.environ['HUGGINGFACEHUB_API_TOKEN']}" } chat_history = [] def chat(user_message): chat_history.append({"role": "user", "content": user_message}) payload = { "inputs": { "past_user_inputs": [m["content"] for m in chat_history if m["role"] == "user"][:-1], "generated_responses": [m["content"] for m in chat_history if m["role"] == "assistant"], "text": user_message }, "options": {"use_cache": False} } response = requests.post(API_URL, headers=HEADERS, json=payload) response.raise_for_status() result = response.json() if isinstance(result, dict) and "generated_text" in result: reply = result["generated_text"] else: reply = result[0].get("generated_text", "") reply = reply.strip() chat_history.append({"role": "assistant", "content": reply}) return reply with gr.Blocks() as demo: chatbot_ui = gr.Chatbot() msg = gr.Textbox( placeholder="Type your message and press Enter", lines=1, show_label=False ) msg.submit(fn=chat, inputs=msg, outputs=chatbot_ui) if __name__ == "__main__": demo.launch()