import os from flask import Flask, request, jsonify from transformers import pipeline import torch # Use the secret you set in Hugging Face Space settings hf_token = os.getenv("HF_TOKEN") MODEL_ID = "Qwen/Qwen2.5-Coder-1.0B-Instruct" print("Loading pipeline (model + tokenizer)...") generator = pipeline("text-generation", model="Qwen/Qwen2.5-Coder-1.5B-Instruct") # The pipeline automatically downloads everything needed # generator = pipeline( # "text-generation", # model=MODEL_ID, # device_map="cpu", # Force CPU for free-tier Spaces # torch_dtype="auto", # token=hf_token # ) print("Pipeline loaded successfully") app = Flask(__name__) @app.route("/generate", methods=["POST"]) def generate(): data = request.json prompt = data.get("prompt", "") max_tokens = int(data.get("max_tokens", 256)) if not prompt: return jsonify({"error": "Prompt required"}), 400 # Pipeline handles tokenization, generation, and decoding automatically result = generator( prompt, max_new_tokens=max_tokens, truncation=True ) return jsonify({"response": result[0]['generated_text']}) @app.route("/", methods=["GET"]) def health(): return jsonify({"status": "ok", "model": MODEL_ID}) if __name__ == "__main__": # Hugging Face Spaces require port 7860 app.run(host="0.0.0.0", port=7860)