from fastapi import FastAPI, WebSocket, WebSocketDisconnect from fastapi.responses import HTMLResponse from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline # Load StarCoder2 model model_name = "bigcode/starcoder2-7b" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto") pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto") app = FastAPI() # HTML and CSS embedded directly html = """ StarCoder2 Docker Terminal
""" @app.get("/") async def get_root(): return HTMLResponse(html) @app.websocket("/ws") async def websocket_endpoint(ws: WebSocket): await ws.accept() history = [] try: while True: user_input = await ws.receive_text() prompt = "".join([f"User: {h[0]}\\nAssistant: {h[1]}\\n" for h in history]) prompt += f"User: {user_input}\\nAssistant:" output = pipe(prompt, max_new_tokens=256, temperature=0.7, pad_token_id=tokenizer.eos_token_id) reply = output[0]["generated_text"][len(prompt):].strip() history.append((user_input, reply)) await ws.send_text(reply) except WebSocketDisconnect: print("Client disconnected")