Spaces:

shivamsshhiivvaamm
/

docker_check

Sleeping

File size: 2,233 Bytes

28f20dc

from fastapi import FastAPI
from fastapi.responses import HTMLResponse, StreamingResponse
from openai import OpenAI
from os import getenv
from dotenv import load_dotenv

load_dotenv()
app = FastAPI()

client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key=getenv("OPENROUTER_API_KEY"),
)

@app.get("/", response_class=HTMLResponse)
def index():
    return """

    <!DOCTYPE html>

    <html>

    <head>

        <title>Streaming Chat</title>

        <style>

            body { font-family: monospace; }

            #output { white-space: pre-wrap; }

        </style>

    </head>

    <body>

        <h2>Streaming Response</h2>

        <button onclick="startChat()">Start Chat</button>

        <div id="output"></div>



        <script>

            async function startChat() {

                const output = document.getElementById("output");

                output.textContent = "";



                const response = await fetch("/chat");



                const reader = response.body.getReader();

                const decoder = new TextDecoder();



                while (true) {

                    const { value, done } = await reader.read();

                    if (done) break;



                    const chunk = decoder.decode(value, { stream: true });

                    output.textContent += chunk;   // 🔥 live append

                }

            }

        </script>

    </body>

    </html>

    """

@app.get("/chat")
def chat():

    def token_generator():
        stream = client.chat.completions.create(
            model="openai/gpt-oss-120b",
            messages=[{"role": "user", "content": "Say you are a poet and write a poem about streaming responses. with the small description and include a dummy name to"}],
            stream=True,
        )

        for event in stream:
            delta = event.choices[0].delta
            if delta and delta.content:
                yield delta.content   # ✅ streamed to frontend

    return StreamingResponse(
        token_generator(),
        media_type="text/plain"
    )



if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)