yousefabdallah031 commited on
Commit
23b0f0f
·
verified ·
1 Parent(s): e6bc80a

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +77 -28
main.py CHANGED
@@ -24,7 +24,7 @@ def load_model_and_tokenizer():
24
  print(f"Using device: {device}")
25
 
26
  try:
27
- model_path = "best_model_final"
28
  tokenizer = AutoTokenizer.from_pretrained(model_path)
29
  model = AutoModelForSequenceClassification.from_pretrained(model_path)
30
  model.to(device)
@@ -34,16 +34,80 @@ def load_model_and_tokenizer():
34
  model.half()
35
 
36
  print("Model and tokenizer loaded successfully!")
 
37
  except Exception as e:
38
  print(f"Error loading model/tokenizer: {e}")
39
- model = None
40
- tokenizer = None
41
 
42
  def cleanup_gpu_memory():
43
  if device and device.type == 'cuda':
44
  torch.cuda.empty_cache()
45
  gc.collect()
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  @app.route("/", methods=['GET'])
48
  def home():
49
  return jsonify({
@@ -51,7 +115,8 @@ def home():
51
  "status": "Model loaded" if model is not None else "Model not loaded",
52
  "device": str(device) if device else "unknown",
53
  "endpoints": {
54
- "/predict": "POST with JSON body containing 'codes' array"
 
55
  }
56
  })
57
 
@@ -88,31 +153,13 @@ def predict_batch():
88
 
89
  for i in range(0, len(validated_codes), batch_size):
90
  batch = validated_codes[i:i+batch_size]
91
-
92
- inputs = tokenizer(
93
- batch,
94
- padding=True,
95
- truncation=True,
96
- max_length=512,
97
- return_tensors="pt"
98
- )
99
- inputs = {k: v.to(device) for k, v in inputs.items()}
100
-
101
- with torch.no_grad():
102
- if device.type == 'cuda':
103
- with torch.cuda.amp.autocast():
104
- outputs = model(**inputs)
105
  else:
106
- outputs = model(**inputs)
107
-
108
- preds = torch.sigmoid(outputs.logits).cpu().numpy()
109
-
110
- for pred in preds:
111
- cpu_time, memory_usage = pred
112
- results.append({
113
- "cpu_time": round(float(cpu_time), 4),
114
- "memory_usage": round(float(memory_usage), 4)
115
- })
116
 
117
  cleanup_gpu_memory()
118
 
@@ -131,6 +178,8 @@ def health_check():
131
  "device": str(device) if device else "unknown"
132
  })
133
 
 
134
  load_model_and_tokenizer()
 
135
  if __name__ == "__main__":
136
  app.run(host="0.0.0.0", port=7860, debug=False, threaded=True)
 
24
  print(f"Using device: {device}")
25
 
26
  try:
27
+ model_path = "./best_model_final"
28
  tokenizer = AutoTokenizer.from_pretrained(model_path)
29
  model = AutoModelForSequenceClassification.from_pretrained(model_path)
30
  model.to(device)
 
34
  model.half()
35
 
36
  print("Model and tokenizer loaded successfully!")
37
+
38
  except Exception as e:
39
  print(f"Error loading model/tokenizer: {e}")
40
+ raise e
 
41
 
42
  def cleanup_gpu_memory():
43
  if device and device.type == 'cuda':
44
  torch.cuda.empty_cache()
45
  gc.collect()
46
 
47
+ def predict_single(code):
48
+ try:
49
+ inputs = tokenizer(
50
+ code,
51
+ padding=True,
52
+ truncation=True,
53
+ max_length=512,
54
+ return_tensors="pt"
55
+ )
56
+ inputs = {k: v.to(device) for k, v in inputs.items()}
57
+
58
+ with torch.no_grad():
59
+ if device.type == 'cuda':
60
+ with torch.cuda.amp.autocast():
61
+ outputs = model(**inputs)
62
+ else:
63
+ outputs = model(**inputs)
64
+
65
+ preds = torch.sigmoid(outputs.logits).cpu().numpy()
66
+ cpu_time, memory_usage = preds[0]
67
+
68
+ return {
69
+ "cpu_time": round(float(cpu_time), 4),
70
+ "memory_usage": round(float(memory_usage), 4)
71
+ }
72
+
73
+ except Exception as e:
74
+ print(f"Single prediction error: {e}")
75
+ return {"cpu_time": 0.0, "memory_usage": 0.0}
76
+
77
+ def predict_with_chunking(code, chunk_size=400, overlap=50):
78
+ try:
79
+ if not code or len(code.strip()) == 0:
80
+ return {"cpu_time": 0.0, "memory_usage": 0.0}
81
+
82
+ tokens = tokenizer.encode(code, add_special_tokens=False)
83
+ if len(tokens) <= 450:
84
+ return predict_single(code)
85
+
86
+ max_cpu_time = 0.0
87
+ max_memory_usage = 0.0
88
+
89
+ for start in range(0, len(tokens), chunk_size - overlap):
90
+ end = min(start + chunk_size, len(tokens))
91
+ chunk_tokens = tokens[start:end]
92
+ chunk_code = tokenizer.decode(chunk_tokens, skip_special_tokens=True)
93
+
94
+ if chunk_code.strip():
95
+ result = predict_single(chunk_code)
96
+ max_cpu_time = max(max_cpu_time, result["cpu_time"])
97
+ max_memory_usage = max(max_memory_usage, result["memory_usage"])
98
+
99
+ if end >= len(tokens):
100
+ break
101
+
102
+ return {
103
+ "cpu_time": round(max_cpu_time, 4),
104
+ "memory_usage": round(max_memory_usage, 4)
105
+ }
106
+
107
+ except Exception as e:
108
+ print(f"Chunking prediction error: {e}")
109
+ return {"cpu_time": 0.0, "memory_usage": 0.0}
110
+
111
  @app.route("/", methods=['GET'])
112
  def home():
113
  return jsonify({
 
115
  "status": "Model loaded" if model is not None else "Model not loaded",
116
  "device": str(device) if device else "unknown",
117
  "endpoints": {
118
+ "/predict": "POST with JSON body containing 'codes' array",
119
+ "/health": "GET server health status"
120
  }
121
  })
122
 
 
153
 
154
  for i in range(0, len(validated_codes), batch_size):
155
  batch = validated_codes[i:i+batch_size]
156
+ for code in batch:
157
+ tokens = tokenizer.encode(code, add_special_tokens=False)
158
+ if len(tokens) > 450:
159
+ result = predict_with_chunking(code)
 
 
 
 
 
 
 
 
 
 
160
  else:
161
+ result = predict_single(code)
162
+ results.append(result)
 
 
 
 
 
 
 
 
163
 
164
  cleanup_gpu_memory()
165
 
 
178
  "device": str(device) if device else "unknown"
179
  })
180
 
181
+ # Load model/tokenizer immediately when app starts (important for Spaces)
182
  load_model_and_tokenizer()
183
+
184
  if __name__ == "__main__":
185
  app.run(host="0.0.0.0", port=7860, debug=False, threaded=True)