import os import asyncio import gradio as gr from openai import AsyncOpenAI # Fetch the API key from Hugging Face Spaces Secrets OPENROUTER_API_KEY = os.environ.get("API_KEY") # Initialize the ASYNC client targeting OpenRouter client = AsyncOpenAI( base_url="https://openrouter.ai", api_key=OPENROUTER_API_KEY, ) # Using 'async def' properly handles streaming under Python 3.13 async def predict(message, history): if not OPENROUTER_API_KEY: yield "Error: API_KEY secret is missing in Hugging Face Space settings." return # Clean up history: Modern Gradio can include system blocks or objects. # OpenRouter requires only raw 'user' and 'assistant' text roles. cleaned_messages = [] if history: for msg in history: role = msg.get("role") content = msg.get("content") # Only append standard text conversations if role in ["user", "assistant"] and isinstance(content, str): cleaned_messages.append({"role": role, "content": content}) # Append the new user prompt cleaned_messages.append({"role": "user", "content": message}) try: # Request an async stream from OpenRouter response = await client.chat.completions.create( model="meta-llama/llama-3-8b-instruct:free", messages=cleaned_messages, stream=True ) # Async generator prevents the StopAsyncIteration crash entirely partial_message = "" async for chunk in response: if chunk.choices and chunk.choices[0].delta.content: partial_message += chunk.choices[0].delta.content yield partial_message except Exception as e: yield f"An error occurred while connecting to OpenRouter: {str(e)}" # Define the ChatInterface cleanly demo = gr.ChatInterface( fn=predict, title="OpenRouter Chatbot", description="Running safely on the absolute latest version of Gradio and Python 3.13.", ) if __name__ == "__main__": demo.launch(ssr_mode=False)