kacperbb commited on
Commit
b260d66
Β·
verified Β·
1 Parent(s): 5ecd7ca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -43
app.py CHANGED
@@ -1,8 +1,9 @@
1
  from flask import Flask, request, jsonify
2
  import os
3
  import logging
 
4
 
5
- # Set cache environment variables
6
  os.environ['HF_HOME'] = '/.cache/huggingface'
7
  os.environ['TRANSFORMERS_CACHE'] = '/.cache/huggingface/transformers'
8
 
@@ -12,72 +13,57 @@ logger = logging.getLogger(__name__)
12
  app = Flask(__name__)
13
  model = None
14
 
 
15
  def load_model():
16
  global model
17
- try:
18
- logger.info("Loading YOUR fine-tuned model...")
19
- from transformers import pipeline
20
-
21
- # Try to use your model with cache settings
22
- model = pipeline(
23
- "text-generation",
24
- model="kacperbb/phi-3.5-hf-finetuned",
25
- trust_remote_code=True,
26
- cache_dir="/.cache/huggingface"
27
- )
28
- logger.info("βœ… YOUR fine-tuned model loaded successfully!")
29
- return True
30
- except Exception as e:
31
- logger.error(f"❌ Error loading your model: {e}")
32
- logger.info("Trying with base model...")
33
  try:
 
34
  model = pipeline(
35
- "text-generation",
36
- model="microsoft/Phi-3.5-mini-instruct",
37
  trust_remote_code=True,
38
  cache_dir="/.cache/huggingface"
39
  )
40
- logger.info("βœ… Base model loaded as fallback")
41
  return True
42
- except Exception as e2:
43
- logger.error(f"❌ Fallback failed: {e2}")
44
- # Last resort - use a smaller model
45
- try:
46
- model = pipeline("text-generation", model="microsoft/DialoGPT-medium")
47
- logger.info("βœ… Smaller fallback model loaded")
48
- return True
49
- except:
50
- return False
51
 
 
 
 
 
52
  @app.route('/generate', methods=['POST'])
53
  def generate_text():
54
  global model
55
  try:
56
  data = request.json
57
- prompt = data.get('inputs', data.get('prompt', ''))
58
  max_tokens = data.get('parameters', {}).get('max_new_tokens', 100)
59
-
60
  if not prompt:
61
  return jsonify({"error": "No prompt provided"}), 400
62
-
63
- if model:
64
- result = model(prompt, max_new_tokens=max_tokens, do_sample=True, temperature=0.7)
65
- response = result[0]['generated_text']
66
- else:
67
  return jsonify({"error": "Model not loaded"}), 500
68
-
69
- return jsonify([{"generated_text": response}])
70
-
 
71
  except Exception as e:
72
  logger.error(f"Generation error: {e}")
73
  return jsonify({"error": str(e)}), 500
74
 
75
  @app.route('/health', methods=['GET'])
76
  def health():
77
- return jsonify({
78
- "status": "healthy",
79
- "model_loaded": model is not None
80
- })
81
 
82
  @app.route('/', methods=['GET'])
83
  def home():
@@ -90,6 +76,7 @@ def home():
90
  }
91
  })
92
 
 
93
  if __name__ == '__main__':
94
  logger.info("Starting Phi 3.5 API...")
95
  load_model()
 
1
  from flask import Flask, request, jsonify
2
  import os
3
  import logging
4
+ from transformers import pipeline
5
 
6
+ # ====== Cache & Logging Setup ======
7
  os.environ['HF_HOME'] = '/.cache/huggingface'
8
  os.environ['TRANSFORMERS_CACHE'] = '/.cache/huggingface/transformers'
9
 
 
13
  app = Flask(__name__)
14
  model = None
15
 
16
+ # ====== Model Loader ======
17
  def load_model():
18
  global model
19
+ models_to_try = [
20
+ ("kacperbb/phi-3.5-hf-finetuned", "YOUR fine-tuned model"),
21
+ ("microsoft/Phi-3.5-mini-instruct", "Base model"),
22
+ ("microsoft/DialoGPT-medium", "Smaller fallback model")
23
+ ]
24
+
25
+ for model_name, description in models_to_try:
 
 
 
 
 
 
 
 
 
26
  try:
27
+ logger.info(f"Loading {description}...")
28
  model = pipeline(
29
+ "text-generation",
30
+ model=model_name,
31
  trust_remote_code=True,
32
  cache_dir="/.cache/huggingface"
33
  )
34
+ logger.info(f"βœ… {description} loaded successfully!")
35
  return True
36
+ except Exception as e:
37
+ logger.error(f"❌ Failed to load {description}: {e}")
 
 
 
 
 
 
 
38
 
39
+ logger.error("❌ All model loading attempts failed!")
40
+ return False
41
+
42
+ # ====== Endpoints ======
43
  @app.route('/generate', methods=['POST'])
44
  def generate_text():
45
  global model
46
  try:
47
  data = request.json
48
+ prompt = data.get('inputs') or data.get('prompt', '')
49
  max_tokens = data.get('parameters', {}).get('max_new_tokens', 100)
50
+
51
  if not prompt:
52
  return jsonify({"error": "No prompt provided"}), 400
53
+
54
+ if not model:
 
 
 
55
  return jsonify({"error": "Model not loaded"}), 500
56
+
57
+ result = model(prompt, max_new_tokens=max_tokens, do_sample=True, temperature=0.7)
58
+ return jsonify([{"generated_text": result[0]['generated_text']}])
59
+
60
  except Exception as e:
61
  logger.error(f"Generation error: {e}")
62
  return jsonify({"error": str(e)}), 500
63
 
64
  @app.route('/health', methods=['GET'])
65
  def health():
66
+ return jsonify({"status": "healthy", "model_loaded": model is not None})
 
 
 
67
 
68
  @app.route('/', methods=['GET'])
69
  def home():
 
76
  }
77
  })
78
 
79
+ # ====== Start Server ======
80
  if __name__ == '__main__':
81
  logger.info("Starting Phi 3.5 API...")
82
  load_model()