helloperson123 commited on
Commit
4e5ae26
·
verified ·
1 Parent(s): c9512df

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -23
app.py CHANGED
@@ -4,50 +4,48 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import torch
5
 
6
  app = Flask(__name__)
7
- CORS(app) # Allow requests from anywhere (for your TurboWarp extension etc.)
8
 
9
- print("🚀 Loading Phi-3-mini model... this may take a minute.")
10
- model_name = "microsoft/Phi-3-mini-4k-instruct"
11
-
12
- # Load model and tokenizer
13
- tokenizer = AutoTokenizer.from_pretrained(model_name)
14
  model = AutoModelForCausalLM.from_pretrained(
15
- model_name,
16
  torch_dtype=torch.float16,
17
  device_map="auto"
18
  )
 
19
 
20
- # 🧠 System prompt — this defines how the AI acts
21
- SYSTEM_PROMPT = """You are Acla, a helpful AI powered by phi-3 mini that can reason about math, code, and logic.
22
- You never hallucinate facts — if unsure, you say so politely.
23
- You can help with logic, reasoning, and programming tasks in a kind, conversational tone."""
24
 
25
  @app.route("/api/ask", methods=["POST"])
26
  def ask():
27
  data = request.get_json()
28
- user_prompt = data.get("prompt", "")
29
 
30
- # Combine system + user prompts
31
- full_prompt = f"<|system|>\n{SYSTEM_PROMPT}\n<|user|>\n{user_prompt}\n<|assistant|>"
 
 
32
 
33
- # Tokenize
34
  inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
35
 
36
- # Generate response
37
  outputs = model.generate(
38
  **inputs,
39
- max_new_tokens=300,
40
  temperature=0.7,
41
  top_p=0.9,
42
- do_sample=True,
43
  )
44
 
45
- # Decode and clean response
46
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
47
- if "<|assistant|>" in response:
48
- response = response.split("<|assistant|>")[-1].strip()
49
 
50
- return jsonify({"reply": response})
51
 
52
 
53
  if __name__ == "__main__":
 
4
  import torch
5
 
6
  app = Flask(__name__)
7
+ CORS(app)
8
 
9
+ # 🔹 Load model
10
+ MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
11
+ print("🚀 Loading Phi-3-mini model (this may take a minute)...")
12
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 
13
  model = AutoModelForCausalLM.from_pretrained(
14
+ MODEL_NAME,
15
  torch_dtype=torch.float16,
16
  device_map="auto"
17
  )
18
+ print("✅ Model ready!")
19
 
20
+ @app.route("/")
21
+ def home():
22
+ return "✅ Phi-3-mini API is running! POST JSON to /api/ask with {'prompt': 'your question'}"
 
23
 
24
  @app.route("/api/ask", methods=["POST"])
25
  def ask():
26
  data = request.get_json()
27
+ prompt = data.get("prompt", "")
28
 
29
+ system_prompt = (
30
+ "You are Acla, a friendly and helpful assistant powered by Phi-3 mini who gives clear, step-by-step answers. "
31
+ "Be concise but thoughtful. Use reasoning and math when needed."
32
+ )
33
 
34
+ full_prompt = f"### System:\n{system_prompt}\n\n### User:\n{prompt}\n\n### Assistant:"
35
  inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
36
 
 
37
  outputs = model.generate(
38
  **inputs,
39
+ max_new_tokens=250,
40
  temperature=0.7,
41
  top_p=0.9,
42
+ do_sample=True
43
  )
44
 
45
+ text = tokenizer.decode(outputs[0], skip_special_tokens=True)
46
+ reply = text.split("### Assistant:")[-1].strip()
 
 
47
 
48
+ return jsonify({"reply": reply})
49
 
50
 
51
  if __name__ == "__main__":