helloperson123 commited on
Commit
a78a95c
·
verified ·
1 Parent(s): 4386b39

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -25
app.py CHANGED
@@ -1,56 +1,59 @@
1
  # app.py
2
  from flask import Flask, request, jsonify
3
  from flask_cors import CORS
4
- from transformers import AutoModelForCausalLM, AutoTokenizer
5
  import torch
6
 
7
  app = Flask(__name__)
8
  CORS(app)
9
 
10
  # ----------------------------
11
- # Model setup
12
  # ----------------------------
13
- MODEL_NAME = "openaccess-ai/phi-3-mini" # Public HF model
14
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
15
 
16
- # Load tokenizer and model
17
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
18
- model = AutoModelForCausalLM.from_pretrained(MODEL_NAME).to(DEVICE)
 
 
 
19
 
20
  # ----------------------------
21
- # System prompt
22
  # ----------------------------
23
  SYSTEM_PROMPT = """#n
24
- You are Acla, an AI Aclaassistant. Be helpful, concise, and accurate.
25
- Focus on math, reasoning, and code when relevant.
26
- Always respond in a friendly and clear manner.
27
  """
28
 
29
  # ----------------------------
30
- # API endpoint
31
  # ----------------------------
32
  @app.route("/api/ask", methods=["POST"])
33
  def ask():
34
- try:
35
- data = request.json
36
- user_prompt = data.get("prompt", "")
37
-
38
- # Combine system prompt and user input
39
- full_prompt = SYSTEM_PROMPT + "\nUser: " + user_prompt + "\nAI:"
40
 
41
- inputs = tokenizer(full_prompt, return_tensors="pt").to(DEVICE)
42
- outputs = model.generate(**inputs, max_new_tokens=150)
43
- answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
44
 
45
- # Remove the system prompt part from output
46
- answer = answer.replace(full_prompt, "").strip()
47
 
48
- return jsonify({"reply": answer})
49
- except Exception as e:
50
- return jsonify({"error": str(e)}), 500
 
 
 
 
 
 
 
 
51
 
52
  # ----------------------------
53
- # Run app
54
  # ----------------------------
55
  if __name__ == "__main__":
56
  app.run(host="0.0.0.0", port=7860)
 
1
  # app.py
2
  from flask import Flask, request, jsonify
3
  from flask_cors import CORS
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM
5
  import torch
6
 
7
  app = Flask(__name__)
8
  CORS(app)
9
 
10
  # ----------------------------
11
+ # Model (UNGATED, FREE)
12
  # ----------------------------
13
+ MODEL_NAME = "microsoft/phi-2"
14
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
15
 
 
16
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
17
+ model = AutoModelForCausalLM.from_pretrained(
18
+ MODEL_NAME,
19
+ torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
20
+ ).to(DEVICE)
21
 
22
  # ----------------------------
23
+ # SYSTEM PROMPT
24
  # ----------------------------
25
  SYSTEM_PROMPT = """#n
26
+ You are a helpful AI assistant.
27
+ You are good at math, reasoning, and programming.
28
+ Explain things clearly and simply.
29
  """
30
 
31
  # ----------------------------
32
+ # API ROUTE
33
  # ----------------------------
34
  @app.route("/api/ask", methods=["POST"])
35
  def ask():
36
+ data = request.get_json()
37
+ user_prompt = data.get("prompt", "")
 
 
 
 
38
 
39
+ prompt = f"{SYSTEM_PROMPT}\nUser: {user_prompt}\nAssistant:"
 
 
40
 
41
+ inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
 
42
 
43
+ outputs = model.generate(
44
+ **inputs,
45
+ max_new_tokens=200,
46
+ do_sample=True,
47
+ temperature=0.7
48
+ )
49
+
50
+ text = tokenizer.decode(outputs[0], skip_special_tokens=True)
51
+ reply = text.split("Assistant:")[-1].strip()
52
+
53
+ return jsonify({"reply": reply})
54
 
55
  # ----------------------------
56
+ # START SERVER
57
  # ----------------------------
58
  if __name__ == "__main__":
59
  app.run(host="0.0.0.0", port=7860)