yousefabdallah031 commited on
Commit
cc7be6c
·
verified ·
1 Parent(s): cff6fd1

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +136 -38
main.py CHANGED
@@ -1,38 +1,136 @@
1
- from flask import Flask, request
2
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
- import torch
4
- import torch.nn.functional as F
5
-
6
- app = Flask(__name__)
7
-
8
- @app.route("/")
9
- def hello():
10
- return request.url
11
-
12
- model_path = "best_model_final"
13
- tokenizer = AutoTokenizer.from_pretrained(model_path)
14
- model = AutoModelForSequenceClassification.from_pretrained(model_path)
15
- model.eval()
16
-
17
- @app.route("/predict", methods=["POST"])
18
- def predict_cpu_memory():
19
- data = request.get_json()
20
-
21
- if not data or "code" not in data:
22
- return {"error": "Missing 'code' in JSON body"}, 400
23
-
24
- code = data["code"]
25
- inputs = tokenizer(code, return_tensors="pt", padding=True, truncation=True)
26
-
27
- with torch.no_grad():
28
- outputs = model(**inputs)
29
- preds = torch.sigmoid(outputs.logits).numpy()
30
-
31
- cpu_time, memory_usage = preds[0]
32
- return {
33
- "cpu_time": float(f"{cpu_time:.4f}"),
34
- "memory_usage": float(f"{memory_usage:.4f}")
35
- }
36
-
37
- if __name__ == "__main__":
38
- app.run(host="0.0.0.0", port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
+ import torch
4
+ import os
5
+ import gc
6
+
7
+ app = Flask(__name__)
8
+
9
+ model = None
10
+ tokenizer = None
11
+ device = None
12
+
13
+ def setup_device():
14
+ if torch.cuda.is_available():
15
+ return torch.device('cuda')
16
+ elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
17
+ return torch.device('mps')
18
+ else:
19
+ return torch.device('cpu')
20
+
21
+ def load_model_and_tokenizer():
22
+ global model, tokenizer, device
23
+ device = setup_device()
24
+ print(f"Using device: {device}")
25
+
26
+ try:
27
+ model_path = "best_model_final"
28
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
29
+ model = AutoModelForSequenceClassification.from_pretrained(model_path)
30
+ model.to(device)
31
+ model.eval()
32
+
33
+ if device.type == 'cuda':
34
+ model.half()
35
+
36
+ print("Model and tokenizer loaded successfully!")
37
+ except Exception as e:
38
+ print(f"Error loading model/tokenizer: {e}")
39
+ model = None
40
+ tokenizer = None
41
+
42
+ def cleanup_gpu_memory():
43
+ if device and device.type == 'cuda':
44
+ torch.cuda.empty_cache()
45
+ gc.collect()
46
+
47
+ @app.route("/", methods=['GET'])
48
+ def home():
49
+ return jsonify({
50
+ "message": "Code Efficiency Prediction API",
51
+ "status": "Model loaded" if model is not None else "Model not loaded",
52
+ "device": str(device) if device else "unknown",
53
+ "endpoints": {
54
+ "/predict": "POST with JSON body containing 'codes' array"
55
+ }
56
+ })
57
+
58
+ @app.route("/predict", methods=['POST'])
59
+ def predict_batch():
60
+ try:
61
+ if model is None or tokenizer is None:
62
+ return jsonify({"error": "Model not loaded properly"}), 500
63
+
64
+ data = request.get_json()
65
+ if not data or 'codes' not in data:
66
+ return jsonify({"error": "Missing 'codes' field in JSON body"}), 400
67
+
68
+ codes = data['codes']
69
+ if not isinstance(codes, list) or len(codes) == 0:
70
+ return jsonify({"error": "'codes' must be a non-empty array"}), 400
71
+
72
+ if len(codes) > 100:
73
+ return jsonify({"error": "Too many codes. Maximum 100 allowed."}), 400
74
+
75
+ validated_codes = []
76
+ for i, code in enumerate(codes):
77
+ if not isinstance(code, str):
78
+ return jsonify({"error": f"Code at index {i} must be a string"}), 400
79
+ if len(code.strip()) == 0:
80
+ validated_codes.append("# empty code")
81
+ elif len(code) > 50000:
82
+ return jsonify({"error": f"Code at index {i} too long. Maximum 50000 characters."}), 400
83
+ else:
84
+ validated_codes.append(code.strip())
85
+
86
+ batch_size = min(len(validated_codes), 16)
87
+ results = []
88
+
89
+ for i in range(0, len(validated_codes), batch_size):
90
+ batch = validated_codes[i:i+batch_size]
91
+
92
+ inputs = tokenizer(
93
+ batch,
94
+ padding=True,
95
+ truncation=True,
96
+ max_length=512,
97
+ return_tensors="pt"
98
+ )
99
+ inputs = {k: v.to(device) for k, v in inputs.items()}
100
+
101
+ with torch.no_grad():
102
+ if device.type == 'cuda':
103
+ with torch.cuda.amp.autocast():
104
+ outputs = model(**inputs)
105
+ else:
106
+ outputs = model(**inputs)
107
+
108
+ preds = torch.sigmoid(outputs.logits).cpu().numpy()
109
+
110
+ for pred in preds:
111
+ cpu_time, memory_usage = pred
112
+ results.append({
113
+ "cpu_time": round(float(cpu_time), 4),
114
+ "memory_usage": round(float(memory_usage), 4)
115
+ })
116
+
117
+ cleanup_gpu_memory()
118
+
119
+ return jsonify({"results": results})
120
+
121
+ except Exception as e:
122
+ cleanup_gpu_memory()
123
+ return jsonify({"error": f"Batch prediction error: {str(e)}"}), 500
124
+
125
+ @app.route("/health", methods=['GET'])
126
+ def health_check():
127
+ return jsonify({
128
+ "status": "healthy",
129
+ "model_loaded": model is not None,
130
+ "tokenizer_loaded": tokenizer is not None,
131
+ "device": str(device) if device else "unknown"
132
+ })
133
+
134
+ if __name__ == "__main__":
135
+ load_model_and_tokenizer()
136
+ app.run(host="0.0.0.0", port=7860, debug=False, threaded=True)