helloperson123 commited on
Commit
1b39db0
·
verified ·
1 Parent(s): 28889ce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -18
app.py CHANGED
@@ -1,44 +1,55 @@
1
  from flask import Flask, request, jsonify
2
- from transformers import AutoTokenizer, AutoModelForCausalLM
3
  from flask_cors import CORS
 
4
  import torch
5
 
6
  app = Flask(__name__)
7
- CORS(app)
8
 
9
- print("🚀 Loading Phi model (microsoft/phi-2)...")
10
- model_name = "microsoft/phi-2"
 
 
11
  tokenizer = AutoTokenizer.from_pretrained(model_name)
12
  model = AutoModelForCausalLM.from_pretrained(
13
  model_name,
14
  torch_dtype=torch.float16,
15
- low_cpu_mem_usage=True
16
  )
17
- model.to("cuda" if torch.cuda.is_available() else "cpu")
18
- print("✅ Phi model loaded!")
 
 
 
 
19
 
20
  @app.route("/api/ask", methods=["POST"])
21
  def ask():
22
- data = request.get_json(force=True)
23
- prompt = data.get("prompt", "")
24
 
25
- chat_prompt = f"### Instruction:\nYou are Acla, a helpful AI powered by phi-3 mini that can reason about math, code, and logic.\n\n### Input:\n{prompt}\n\n### Response:"
26
- inputs = tokenizer(chat_prompt, return_tensors="pt").to(model.device)
 
 
 
 
 
27
  outputs = model.generate(
28
  **inputs,
29
- max_new_tokens=250,
30
  temperature=0.7,
31
  top_p=0.9,
32
- do_sample=True
33
  )
34
 
35
- text = tokenizer.decode(outputs[0], skip_special_tokens=True)
36
- response = text.split("### Response:")[-1].strip()
 
 
 
37
  return jsonify({"reply": response})
38
 
39
- @app.route("/")
40
- def home():
41
- return "🧠 Phi-2 chatbot is running! POST JSON to /api/ask with {'prompt': 'your question'}."
42
 
43
  if __name__ == "__main__":
44
  app.run(host="0.0.0.0", port=7860)
 
1
  from flask import Flask, request, jsonify
 
2
  from flask_cors import CORS
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import torch
5
 
6
  app = Flask(__name__)
7
+ CORS(app) # Allow requests from anywhere (for your TurboWarp extension etc.)
8
 
9
+ print("🚀 Loading Phi-3-mini model... this may take a minute.")
10
+ model_name = "microsoft/Phi-3-mini-4k-instruct"
11
+
12
+ # Load model and tokenizer
13
  tokenizer = AutoTokenizer.from_pretrained(model_name)
14
  model = AutoModelForCausalLM.from_pretrained(
15
  model_name,
16
  torch_dtype=torch.float16,
17
+ device_map="auto"
18
  )
19
+
20
+ # 🧠 System prompt — this defines how the AI acts
21
+ SYSTEM_PROMPT = """You are Phi, a friendly, helpful, and intelligent AI assistant.
22
+ You always explain your reasoning clearly and step-by-step when solving math or code problems.
23
+ You never hallucinate facts — if unsure, you say so politely.
24
+ You can help with logic, reasoning, and programming tasks in a kind, conversational tone."""
25
 
26
  @app.route("/api/ask", methods=["POST"])
27
  def ask():
28
+ data = request.get_json()
29
+ user_prompt = data.get("prompt", "")
30
 
31
+ # Combine system + user prompts
32
+ full_prompt = f"<|system|>\nYou are Acla, a helpful AI powered by phi-3 mini that can reason about math, code, and logic.\n<|user|>\n{user_prompt}\n<|assistant|>"
33
+
34
+ # Tokenize
35
+ inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
36
+
37
+ # Generate response
38
  outputs = model.generate(
39
  **inputs,
40
+ max_new_tokens=300,
41
  temperature=0.7,
42
  top_p=0.9,
43
+ do_sample=True,
44
  )
45
 
46
+ # Decode and clean response
47
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
48
+ if "<|assistant|>" in response:
49
+ response = response.split("<|assistant|>")[-1].strip()
50
+
51
  return jsonify({"reply": response})
52
 
 
 
 
53
 
54
  if __name__ == "__main__":
55
  app.run(host="0.0.0.0", port=7860)