File size: 1,380 Bytes
2be16a7
c951bf7
834b974
c951bf7
 
834b974
 
2be16a7
c951bf7
 
834b974
ed37bf9
 
 
834b974
ed37bf9
 
 
 
 
 
 
834b974
c951bf7
 
 
 
 
 
 
 
834b974
c951bf7
 
 
834b974
 
 
 
 
 
 
 
c951bf7
 
 
 
 
 
834b974
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import os
from flask import Flask, request, jsonify
from transformers import pipeline
import torch

# Use the secret you set in Hugging Face Space settings
hf_token = os.getenv("HF_TOKEN")

MODEL_ID = "Qwen/Qwen2.5-Coder-1.0B-Instruct"

print("Loading pipeline (model + tokenizer)...")

generator = pipeline("text-generation", model="Qwen/Qwen2.5-Coder-1.5B-Instruct")

# The pipeline automatically downloads everything needed
# generator = pipeline(
#     "text-generation",
#     model=MODEL_ID,
#     device_map="cpu", # Force CPU for free-tier Spaces
#     torch_dtype="auto",
#     token=hf_token
# )
print("Pipeline loaded successfully")

app = Flask(__name__)

@app.route("/generate", methods=["POST"])
def generate():
    data = request.json
    prompt = data.get("prompt", "")
    max_tokens = int(data.get("max_tokens", 256))

    if not prompt:
        return jsonify({"error": "Prompt required"}), 400

    # Pipeline handles tokenization, generation, and decoding automatically
    result = generator(
        prompt, 
        max_new_tokens=max_tokens, 
        truncation=True
    )
    
    return jsonify({"response": result[0]['generated_text']})

@app.route("/", methods=["GET"])
def health():
    return jsonify({"status": "ok", "model": MODEL_ID})

if __name__ == "__main__":
    # Hugging Face Spaces require port 7860
    app.run(host="0.0.0.0", port=7860)