Spaces:

helloperson123
/

tiny-llama-chatbot

Runtime error

App Files Files Community

helloperson123 commited on Nov 11, 2025

Commit

4e5ae26

verified ·

1 Parent(s): c9512df

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -23

app.py CHANGED Viewed

@@ -4,50 +4,48 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 app = Flask(__name__)
-CORS(app)  # Allow requests from anywhere (for your TurboWarp extension etc.)
-print("🚀 Loading Phi-3-mini model... this may take a minute.")
-model_name = "microsoft/Phi-3-mini-4k-instruct"
-# Load model and tokenizer
-tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(
-    model_name,
     torch_dtype=torch.float16,
     device_map="auto"
 )
-# 🧠 System prompt — this defines how the AI acts
-SYSTEM_PROMPT = """You are Acla, a helpful AI powered by phi-3 mini that can reason about math, code, and logic.
-You never hallucinate facts — if unsure, you say so politely.
-You can help with logic, reasoning, and programming tasks in a kind, conversational tone."""
 @app.route("/api/ask", methods=["POST"])
 def ask():
     data = request.get_json()
-    user_prompt = data.get("prompt", "")
-    # Combine system + user prompts
-    full_prompt = f"<|system|>\n{SYSTEM_PROMPT}\n<|user|>\n{user_prompt}\n<|assistant|>"
-    # Tokenize
     inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
-    # Generate response
     outputs = model.generate(
         **inputs,
-        max_new_tokens=300,
         temperature=0.7,
         top_p=0.9,
-        do_sample=True,
     )
-    # Decode and clean response
-    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    if "<|assistant|>" in response:
-        response = response.split("<|assistant|>")[-1].strip()
-    return jsonify({"reply": response})
 if __name__ == "__main__":

 import torch
 app = Flask(__name__)
+CORS(app)
+# 🔹 Load model
+MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
+print("🚀 Loading Phi-3-mini model (this may take a minute)...")
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 model = AutoModelForCausalLM.from_pretrained(
+    MODEL_NAME,
     torch_dtype=torch.float16,
     device_map="auto"
 )
+print("✅ Model ready!")
+@app.route("/")
+def home():
+    return "✅ Phi-3-mini API is running! POST JSON to /api/ask with {'prompt': 'your question'}"
 @app.route("/api/ask", methods=["POST"])
 def ask():
     data = request.get_json()
+    prompt = data.get("prompt", "")
+    system_prompt = (
+        "You are Acla, a friendly and helpful assistant powered by Phi-3 mini who gives clear, step-by-step answers. "
+        "Be concise but thoughtful. Use reasoning and math when needed."
+    )
+    full_prompt = f"### System:\n{system_prompt}\n\n### User:\n{prompt}\n\n### Assistant:"
     inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
     outputs = model.generate(
         **inputs,
+        max_new_tokens=250,
         temperature=0.7,
         top_p=0.9,
+        do_sample=True
     )
+    text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    reply = text.split("### Assistant:")[-1].strip()
+    return jsonify({"reply": reply})
 if __name__ == "__main__":