helloperson123 commited on
Commit
2cab154
·
verified ·
1 Parent(s): 4e5ae26

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -24
app.py CHANGED
@@ -1,21 +1,18 @@
1
  from flask import Flask, request, jsonify
2
  from flask_cors import CORS
3
- from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import torch
5
 
6
  app = Flask(__name__)
7
  CORS(app)
8
 
9
- # 🔹 Load model
10
- MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
11
- print("🚀 Loading Phi-3-mini model (this may take a minute)...")
12
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
13
  model = AutoModelForCausalLM.from_pretrained(
14
- MODEL_NAME,
15
  torch_dtype=torch.float16,
16
  device_map="auto"
17
  )
18
- print("✅ Model ready!")
19
 
20
  @app.route("/")
21
  def home():
@@ -26,27 +23,18 @@ def ask():
26
  data = request.get_json()
27
  prompt = data.get("prompt", "")
28
 
29
- system_prompt = (
30
- "You are Acla, a friendly and helpful assistant powered by Phi-3 mini who gives clear, step-by-step answers. "
31
- "Be concise but thoughtful. Use reasoning and math when needed."
32
- )
33
 
34
- full_prompt = f"### System:\n{system_prompt}\n\n### User:\n{prompt}\n\n### Assistant:"
35
  inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
36
 
37
- outputs = model.generate(
38
- **inputs,
39
- max_new_tokens=250,
40
- temperature=0.7,
41
- top_p=0.9,
42
- do_sample=True
43
- )
44
-
45
- text = tokenizer.decode(outputs[0], skip_special_tokens=True)
46
- reply = text.split("### Assistant:")[-1].strip()
47
-
48
  return jsonify({"reply": reply})
49
 
50
-
51
  if __name__ == "__main__":
52
  app.run(host="0.0.0.0", port=7860)
 
1
  from flask import Flask, request, jsonify
2
  from flask_cors import CORS
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
  import torch
5
 
6
  app = Flask(__name__)
7
  CORS(app)
8
 
9
+ print("🧠 Loading Phi-3-mini model...")
10
+ tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
 
 
11
  model = AutoModelForCausalLM.from_pretrained(
12
+ "microsoft/Phi-3-mini-4k-instruct",
13
  torch_dtype=torch.float16,
14
  device_map="auto"
15
  )
 
16
 
17
  @app.route("/")
18
  def home():
 
23
  data = request.get_json()
24
  prompt = data.get("prompt", "")
25
 
26
+ if not prompt:
27
+ return jsonify({"error": "No prompt provided"}), 400
 
 
28
 
29
+ full_prompt = f"You are Acla, a friendly and helpful assistant powered by Phi-3 mini who gives clear, step-by-step answers. " "Be concise but thoughtful. Use reasoning and math when needed.\nUser: {prompt}\nAssistant:"
30
  inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
31
 
32
+ with torch.no_grad():
33
+ outputs = model.generate(**inputs, max_new_tokens=200)
34
+
35
+ reply = tokenizer.decode(outputs[0], skip_special_tokens=True)
36
+ reply = reply.split("Assistant:")[-1].strip()
 
 
 
 
 
 
37
  return jsonify({"reply": reply})
38
 
 
39
  if __name__ == "__main__":
40
  app.run(host="0.0.0.0", port=7860)