Chvigo commited on
Commit
834b974
·
verified ·
1 Parent(s): 3930e18

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -16
app.py CHANGED
@@ -1,22 +1,23 @@
1
  import os
2
  from flask import Flask, request, jsonify
3
- from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import torch
5
 
6
- hf_token = "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"
 
7
 
8
  MODEL_ID = "Qwen/Qwen2.5-Coder-1.0B-Instruct"
9
 
10
- print("Loading tokenizer...")
11
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=hf_token )
12
-
13
- print("Loading model...")
14
- model = AutoModelForCausalLM.from_pretrained(
15
- MODEL_ID,
16
- device_map="cpu",
17
- token=hf_token
18
  )
19
- print("Model loaded successfully")
20
 
21
  app = Flask(__name__)
22
 
@@ -25,17 +26,23 @@ def generate():
25
  data = request.json
26
  prompt = data.get("prompt", "")
27
  max_tokens = int(data.get("max_tokens", 256))
 
28
  if not prompt:
29
  return jsonify({"error": "Prompt required"}), 400
30
 
31
- inputs = tokenizer(prompt, return_tensors="pt")
32
- outputs = model.generate(**inputs, max_new_tokens=max_tokens)
33
- text = tokenizer.decode(outputs[0], skip_special_tokens=True)
34
- return jsonify({"response": text})
 
 
 
 
35
 
36
  @app.route("/", methods=["GET"])
37
  def health():
38
  return jsonify({"status": "ok", "model": MODEL_ID})
39
 
40
  if __name__ == "__main__":
41
- app.run(host="0.0.0.0", port=8000)
 
 
1
  import os
2
  from flask import Flask, request, jsonify
3
+ from transformers import pipeline
4
  import torch
5
 
6
+ # Use the secret you set in Hugging Face Space settings
7
+ hf_token = os.getenv("HF_TOKEN")
8
 
9
  MODEL_ID = "Qwen/Qwen2.5-Coder-1.0B-Instruct"
10
 
11
+ print("Loading pipeline (model + tokenizer)...")
12
+ # The pipeline automatically downloads everything needed
13
+ generator = pipeline(
14
+ "text-generation",
15
+ model=MODEL_ID,
16
+ device_map="cpu", # Force CPU for free-tier Spaces
17
+ torch_dtype="auto",
18
+ token=hf_token
19
  )
20
+ print("Pipeline loaded successfully")
21
 
22
  app = Flask(__name__)
23
 
 
26
  data = request.json
27
  prompt = data.get("prompt", "")
28
  max_tokens = int(data.get("max_tokens", 256))
29
+
30
  if not prompt:
31
  return jsonify({"error": "Prompt required"}), 400
32
 
33
+ # Pipeline handles tokenization, generation, and decoding automatically
34
+ result = generator(
35
+ prompt,
36
+ max_new_tokens=max_tokens,
37
+ truncation=True
38
+ )
39
+
40
+ return jsonify({"response": result[0]['generated_text']})
41
 
42
  @app.route("/", methods=["GET"])
43
  def health():
44
  return jsonify({"status": "ok", "model": MODEL_ID})
45
 
46
  if __name__ == "__main__":
47
+ # Hugging Face Spaces require port 7860
48
+ app.run(host="0.0.0.0", port=7860)