File size: 1,380 Bytes
2be16a7 c951bf7 834b974 c951bf7 834b974 2be16a7 c951bf7 834b974 ed37bf9 834b974 ed37bf9 834b974 c951bf7 834b974 c951bf7 834b974 c951bf7 834b974 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import os
from flask import Flask, request, jsonify
from transformers import pipeline
import torch
# Use the secret you set in Hugging Face Space settings
hf_token = os.getenv("HF_TOKEN")
MODEL_ID = "Qwen/Qwen2.5-Coder-1.0B-Instruct"
print("Loading pipeline (model + tokenizer)...")
generator = pipeline("text-generation", model="Qwen/Qwen2.5-Coder-1.5B-Instruct")
# The pipeline automatically downloads everything needed
# generator = pipeline(
# "text-generation",
# model=MODEL_ID,
# device_map="cpu", # Force CPU for free-tier Spaces
# torch_dtype="auto",
# token=hf_token
# )
print("Pipeline loaded successfully")
app = Flask(__name__)
@app.route("/generate", methods=["POST"])
def generate():
data = request.json
prompt = data.get("prompt", "")
max_tokens = int(data.get("max_tokens", 256))
if not prompt:
return jsonify({"error": "Prompt required"}), 400
# Pipeline handles tokenization, generation, and decoding automatically
result = generator(
prompt,
max_new_tokens=max_tokens,
truncation=True
)
return jsonify({"response": result[0]['generated_text']})
@app.route("/", methods=["GET"])
def health():
return jsonify({"status": "ok", "model": MODEL_ID})
if __name__ == "__main__":
# Hugging Face Spaces require port 7860
app.run(host="0.0.0.0", port=7860)
|