saadawaissheikh commited on
Commit
66d3a19
Β·
verified Β·
1 Parent(s): 1f642e8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -19
app.py CHANGED
@@ -4,30 +4,28 @@ import torch
4
 
5
  app = Flask(__name__)
6
 
7
- # Load OpenChat model and tokenizer
8
- MODEL_NAME = "openchat/openchat-3.5-0106"
9
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
10
- model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto")
11
 
12
- chatbot = pipeline("text-generation", model=model, tokenizer=tokenizer)
 
 
 
 
 
 
 
 
 
13
 
14
  @app.route("/chat", methods=["POST"])
15
  def chat():
16
- data = request.get_json()
17
- prompt = data.get("message", "")
18
- if not prompt:
19
  return jsonify({"error": "Empty message"}), 400
20
 
21
- system_prompt = "<|system|>\nYou are a helpful assistant for food ordering.\n<|user|>\n" + prompt + "\n<|assistant|>\n"
22
- output = chatbot(system_prompt, max_new_tokens=200, do_sample=True, temperature=0.7)[0]["generated_text"]
23
-
24
- # Extract response after <|assistant|>
25
- if "<|assistant|>" in output:
26
- reply = output.split("<|assistant|>")[-1].strip()
27
- else:
28
- reply = output
29
-
30
- return jsonify({"response": reply})
31
 
32
  if __name__ == "__main__":
33
- app.run(debug=True)
 
4
 
5
  app = Flask(__name__)
6
 
7
+ MODEL_NAME = "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ"
 
 
 
8
 
9
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
10
+ model = AutoModelForCausalLM.from_pretrained(
11
+ MODEL_NAME,
12
+ device_map="auto",
13
+ torch_dtype=torch.float16,
14
+ trust_remote_code=True,
15
+ revision="gptq-4bit-32g-actorder_True" # Based on the quant
16
+ )
17
+
18
+ generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
19
 
20
  @app.route("/chat", methods=["POST"])
21
  def chat():
22
+ user_input = request.json.get("message", "")
23
+ if not user_input:
 
24
  return jsonify({"error": "Empty message"}), 400
25
 
26
+ prompt = f"[INST] You are a helpful assistant for food ordering.\n{user_input} [/INST]"
27
+ result = generator(prompt, max_new_tokens=200, do_sample=True, temperature=0.7)[0]['generated_text']
28
+ return jsonify({"response": result})
 
 
 
 
 
 
 
29
 
30
  if __name__ == "__main__":
31
+ app.run(host="0.0.0.0", port=7860)