| from fastapi import FastAPI |
| from pydantic import BaseModel |
| from transformers import AutoTokenizer, AutoModelForCausalLM |
| import torch |
|
|
| |
| |
| model_name = "deepseek-ai/deepseek-coder-1.3b-instruct" |
|
|
| print("Loading model... this may take a minute ⏳") |
| tokenizer = AutoTokenizer.from_pretrained(model_name) |
| model = AutoModelForCausalLM.from_pretrained( |
| model_name, |
| torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, |
| device_map="auto" |
| ) |
| print("Model loaded ✅") |
|
|
| app = FastAPI() |
|
|
| class ChatRequest(BaseModel): |
| message: str |
|
|
| @app.post("/chat") |
| def chat(request: ChatRequest): |
| """Chat endpoint using DeepSeek model""" |
| inputs = tokenizer(request.message, return_tensors="pt").to(model.device) |
| outputs = model.generate(**inputs, max_new_tokens=200) |
| reply = tokenizer.decode(outputs[0], skip_special_tokens=True) |
| return {"reply": reply} |
|
|