File size: 1,045 Bytes
5fc5b46
 
c6839ec
 
5fc5b46
c6839ec
66d3a19
c6839ec
66d3a19
 
 
 
 
 
 
 
 
 
c6839ec
5fc5b46
 
66d3a19
 
5fc5b46
c6839ec
66d3a19
 
 
c6839ec
5fc5b46
66d3a19
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
from flask import Flask, request, jsonify
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch

app = Flask(__name__)

MODEL_NAME = "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    torch_dtype=torch.float16,
    trust_remote_code=True,
    revision="gptq-4bit-32g-actorder_True"  # Based on the quant
)

generator = pipeline("text-generation", model=model, tokenizer=tokenizer)

@app.route("/chat", methods=["POST"])
def chat():
    user_input = request.json.get("message", "")
    if not user_input:
        return jsonify({"error": "Empty message"}), 400

    prompt = f"[INST] You are a helpful assistant for food ordering.\n{user_input} [/INST]"
    result = generator(prompt, max_new_tokens=200, do_sample=True, temperature=0.7)[0]['generated_text']
    return jsonify({"response": result})

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860)