Spaces:

Neon-AI
/

chatbot

Paused

App Files Files Community

Neon-AI commited on Jan 8

Commit

2f41b1c

verified ·

1 Parent(s): e1352c6

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -6

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
 MODEL_ID = "microsoft/Phi-3-mini-4k-instruct"
 app = FastAPI(
-    title="General AI REST API - Phi-3 Mini",
     version="1.0.0"
 )
@@ -27,7 +27,7 @@ model.eval()
 # ------------------------------
 class ChatRequest(BaseModel):
     prompt: str
-    max_tokens: int = 150
     temperature: float = 0.25
     top_p: float = 0.95
@@ -49,25 +49,43 @@ def chat(req: ChatRequest):
     if not req.prompt or len(req.prompt.strip()) == 0:
         raise HTTPException(status_code=400, detail="Prompt is empty")
     # Safety caps
-    prompt = req.prompt[:500]                  # limit prompt length
-    max_tokens = min(req.max_tokens, 150)     # limit max tokens
-    # Build messages for Phi-3 Mini instruct
     messages = [
-        {"role": "system", "content": "You are a concise, intelligent assistant. Answer clearly and briefly."},
         {"role": "user", "content": prompt}
     ]
     input_ids = tokenizer.apply_chat_template(
         messages,
         tokenize=True,
         return_tensors="pt"
     )
     with torch.no_grad():
         output = model.generate(
             input_ids,
             max_new_tokens=max_tokens,
             temperature=req.temperature,
             top_p=req.top_p,
@@ -75,6 +93,7 @@ def chat(req: ChatRequest):
             do_sample=True
         )
     reply = tokenizer.decode(
         output[0][input_ids.shape[-1]:],
         skip_special_tokens=True

 MODEL_ID = "microsoft/Phi-3-mini-4k-instruct"
 app = FastAPI(
+    title="Neon Tech Chatbot",
     version="1.0.0"
 )
 # ------------------------------
 class ChatRequest(BaseModel):
     prompt: str
+    max_tokens: int = 120
     temperature: float = 0.25
     top_p: float = 0.95
     if not req.prompt or len(req.prompt.strip()) == 0:
         raise HTTPException(status_code=400, detail="Prompt is empty")
+    # ------------------------------
     # Safety caps
+    # ------------------------------
+    prompt = req.prompt[:500]               # limit prompt length
+    max_tokens = min(req.max_tokens, 150)  # limit max tokens
+    # ------------------------------
+    # Build messages for instruct
+    # ------------------------------
     messages = [
+        {
+            "role": "system",
+            "content": (
+                "You are a concise, intelligent assistant. "
+                "Always respond in plain text. "
+                "Never output JSON, code blocks, or structured data. "
+                "Answer clearly and briefly."
+            )
+        },
         {"role": "user", "content": prompt}
     ]
+    # Tokenize + create attention mask explicitly
     input_ids = tokenizer.apply_chat_template(
         messages,
         tokenize=True,
         return_tensors="pt"
     )
+    attention_mask = torch.ones_like(input_ids)
+    # ------------------------------
+    # Generate response
+    # ------------------------------
     with torch.no_grad():
         output = model.generate(
             input_ids,
+            attention_mask=attention_mask,
             max_new_tokens=max_tokens,
             temperature=req.temperature,
             top_p=req.top_p,
             do_sample=True
         )
+    # Decode output, skip the prompt tokens
     reply = tokenizer.decode(
         output[0][input_ids.shape[-1]:],
         skip_special_tokens=True