from fastapi import FastAPI from fastapi.responses import HTMLResponse, StreamingResponse from openai import OpenAI from os import getenv from dotenv import load_dotenv load_dotenv() app = FastAPI() client = OpenAI( base_url="https://openrouter.ai/api/v1", api_key=getenv("OPENROUTER_API_KEY"), ) @app.get("/", response_class=HTMLResponse) def index(): return """ Streaming Chat

Streaming Response

""" @app.get("/chat") def chat(): def token_generator(): stream = client.chat.completions.create( model="openai/gpt-oss-120b", messages=[{"role": "user", "content": "Say you are a poet and write a poem about streaming responses. with the small description and include a dummy name to"}], stream=True, ) for event in stream: delta = event.choices[0].delta if delta and delta.content: yield delta.content # ✅ streamed to frontend return StreamingResponse( token_generator(), media_type="text/plain" ) if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8000)