Spaces:

skkalwar
/

LLM_Model

Sleeping

Shreekant Kalwar (Nokia) commited on Aug 29

Commit

55a61ee

1 Parent(s): 941e116

runtime.txt add

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,18 +3,6 @@ from pydantic import BaseModel
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
-# Load DeepSeek model (small one for local use)
-# Try bigger models if you have a GPU with >12GB VRAM
-model_name = "deepseek-ai/deepseek-coder-1.3b-instruct"
-print("Loading model... this may take a minute ⏳")
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(
-    model_name,
-    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-    device_map="auto"
-)
-print("Model loaded ✅")
 app = FastAPI()
@@ -23,6 +11,20 @@ class ChatRequest(BaseModel):
 @app.get("/")
 def root():
     return {"status": "ok"}
 @app.post("/chat")

 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 app = FastAPI()
 @app.get("/")
 def root():
+    # Load DeepSeek model (small one for local use)
+    # Try bigger models if you have a GPU with >12GB VRAM
+    model_name = "deepseek-ai/deepseek-coder-1.3b-instruct"
+    print("Loading model... this may take a minute ⏳")
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+        device_map="auto"
+    )
+    print("Model loaded ✅")
     return {"status": "ok"}
 @app.post("/chat")