qwencoder / app.py
Chvigo's picture
Update app.py
ed37bf9 verified
import os
from flask import Flask, request, jsonify
from transformers import pipeline
import torch
# Use the secret you set in Hugging Face Space settings
hf_token = os.getenv("HF_TOKEN")
MODEL_ID = "Qwen/Qwen2.5-Coder-1.0B-Instruct"
print("Loading pipeline (model + tokenizer)...")
generator = pipeline("text-generation", model="Qwen/Qwen2.5-Coder-1.5B-Instruct")
# The pipeline automatically downloads everything needed
# generator = pipeline(
# "text-generation",
# model=MODEL_ID,
# device_map="cpu", # Force CPU for free-tier Spaces
# torch_dtype="auto",
# token=hf_token
# )
print("Pipeline loaded successfully")
app = Flask(__name__)
@app.route("/generate", methods=["POST"])
def generate():
data = request.json
prompt = data.get("prompt", "")
max_tokens = int(data.get("max_tokens", 256))
if not prompt:
return jsonify({"error": "Prompt required"}), 400
# Pipeline handles tokenization, generation, and decoding automatically
result = generator(
prompt,
max_new_tokens=max_tokens,
truncation=True
)
return jsonify({"response": result[0]['generated_text']})
@app.route("/", methods=["GET"])
def health():
return jsonify({"status": "ok", "model": MODEL_ID})
if __name__ == "__main__":
# Hugging Face Spaces require port 7860
app.run(host="0.0.0.0", port=7860)