|
|
import os |
|
|
from flask import Flask, request, jsonify |
|
|
from transformers import pipeline |
|
|
import torch |
|
|
|
|
|
|
|
|
hf_token = os.getenv("HF_TOKEN") |
|
|
|
|
|
MODEL_ID = "Qwen/Qwen2.5-Coder-1.0B-Instruct" |
|
|
|
|
|
print("Loading pipeline (model + tokenizer)...") |
|
|
|
|
|
generator = pipeline("text-generation", model="Qwen/Qwen2.5-Coder-1.5B-Instruct") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("Pipeline loaded successfully") |
|
|
|
|
|
app = Flask(__name__) |
|
|
|
|
|
@app.route("/generate", methods=["POST"]) |
|
|
def generate(): |
|
|
data = request.json |
|
|
prompt = data.get("prompt", "") |
|
|
max_tokens = int(data.get("max_tokens", 256)) |
|
|
|
|
|
if not prompt: |
|
|
return jsonify({"error": "Prompt required"}), 400 |
|
|
|
|
|
|
|
|
result = generator( |
|
|
prompt, |
|
|
max_new_tokens=max_tokens, |
|
|
truncation=True |
|
|
) |
|
|
|
|
|
return jsonify({"response": result[0]['generated_text']}) |
|
|
|
|
|
@app.route("/", methods=["GET"]) |
|
|
def health(): |
|
|
return jsonify({"status": "ok", "model": MODEL_ID}) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
app.run(host="0.0.0.0", port=7860) |
|
|
|