helloperson123 commited on
Commit
c20d839
·
verified ·
1 Parent(s): 2cab154

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -28
app.py CHANGED
@@ -1,40 +1,43 @@
1
  from flask import Flask, request, jsonify
2
- from flask_cors import CORS
3
- from transformers import AutoModelForCausalLM, AutoTokenizer
4
- import torch
5
 
6
  app = Flask(__name__)
7
- CORS(app)
8
 
9
- print("🧠 Loading Phi-3-mini model...")
10
- tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
11
- model = AutoModelForCausalLM.from_pretrained(
12
- "microsoft/Phi-3-mini-4k-instruct",
13
- torch_dtype=torch.float16,
14
- device_map="auto"
 
 
 
 
 
 
 
15
  )
16
 
17
- @app.route("/")
18
- def home():
19
- return "✅ Phi-3-mini API is running! POST JSON to /api/ask with {'prompt': 'your question'}"
 
 
20
 
21
  @app.route("/api/ask", methods=["POST"])
22
  def ask():
23
- data = request.get_json()
24
- prompt = data.get("prompt", "")
25
-
26
- if not prompt:
27
- return jsonify({"error": "No prompt provided"}), 400
28
-
29
- full_prompt = f"You are Acla, a friendly and helpful assistant powered by Phi-3 mini who gives clear, step-by-step answers. " "Be concise but thoughtful. Use reasoning and math when needed.\nUser: {prompt}\nAssistant:"
30
- inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
31
-
32
- with torch.no_grad():
33
- outputs = model.generate(**inputs, max_new_tokens=200)
34
 
35
- reply = tokenizer.decode(outputs[0], skip_special_tokens=True)
36
- reply = reply.split("Assistant:")[-1].strip()
37
- return jsonify({"reply": reply})
 
 
 
 
 
 
38
 
39
  if __name__ == "__main__":
40
- app.run(host="0.0.0.0", port=7860)
 
1
  from flask import Flask, request, jsonify
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 
 
3
 
4
  app = Flask(__name__)
 
5
 
6
+ # -------------------------------
7
+ # Load Phi-3-mini model
8
+ # -------------------------------
9
+ MODEL_NAME = "helloperson123/phi-3-mini"
10
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
11
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
12
+
13
+ # Create text-generation pipeline
14
+ generator = pipeline(
15
+ "text-generation",
16
+ model=model,
17
+ tokenizer=tokenizer,
18
+ max_new_tokens=256,
19
  )
20
 
21
+ # -------------------------------
22
+ # System prompt for behavior
23
+ # -------------------------------
24
+ SYSTEM_PROMPT = """#n You are Acla, an AI assistant that is helpful, concise,
25
+ and specializes in reasoning, math, and coding. Answer clearly and step by step."""
26
 
27
  @app.route("/api/ask", methods=["POST"])
28
  def ask():
29
+ data = request.json
30
+ user_prompt = data.get("prompt", "")
 
 
 
 
 
 
 
 
 
31
 
32
+ # Combine system prompt + user prompt
33
+ full_prompt = f"{SYSTEM_PROMPT}\nUser: {user_prompt}\nAI:"
34
+
35
+ try:
36
+ output = generator(full_prompt)
37
+ reply = output[0]["generated_text"].split("AI:")[-1].strip()
38
+ return jsonify({"reply": reply})
39
+ except Exception as e:
40
+ return jsonify({"error": str(e)}), 500
41
 
42
  if __name__ == "__main__":
43
+ app.run(host="0.0.0.0", port=5000)