Chvigo commited on
Commit
c951bf7
·
verified ·
1 Parent(s): e116e1f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -0
app.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import torch
4
+
5
+ MODEL_ID = "Qwen/Qwen2.5-Coder-1.0B-Instruct"
6
+
7
+ print("Loading tokenizer...")
8
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
9
+
10
+ print("Loading model...")
11
+ model = AutoModelForCausalLM.from_pretrained(
12
+ MODEL_ID,
13
+ device_map="cpu"
14
+ )
15
+ print("Model loaded successfully")
16
+
17
+ app = Flask(__name__)
18
+
19
+ @app.route("/generate", methods=["POST"])
20
+ def generate():
21
+ data = request.json
22
+ prompt = data.get("prompt", "")
23
+ max_tokens = int(data.get("max_tokens", 256))
24
+ if not prompt:
25
+ return jsonify({"error": "Prompt required"}), 400
26
+
27
+ inputs = tokenizer(prompt, return_tensors="pt")
28
+ outputs = model.generate(**inputs, max_new_tokens=max_tokens)
29
+ text = tokenizer.decode(outputs[0], skip_special_tokens=True)
30
+ return jsonify({"response": text})
31
+
32
+ @app.route("/", methods=["GET"])
33
+ def health():
34
+ return jsonify({"status": "ok", "model": MODEL_ID})
35
+
36
+ if __name__ == "__main__":
37
+ app.run(host="0.0.0.0", port=8000)