Spaces:

yousefabdallah031
/

efficiency-eval

Runtime error

App Files Files Community

yousefabdallah031 commited on Jul 2, 2025

Commit

cc7be6c

verified ·

1 Parent(s): cff6fd1

Update main.py

Browse files

Files changed (1) hide show

main.py +136 -38

main.py CHANGED Viewed

@@ -1,38 +1,136 @@
-from flask import Flask, request
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-import torch
-import torch.nn.functional as F
-app = Flask(__name__)
-@app.route("/")
-def hello():
-    return request.url
-model_path = "best_model_final"
-tokenizer = AutoTokenizer.from_pretrained(model_path)
-model = AutoModelForSequenceClassification.from_pretrained(model_path)
-model.eval()
-@app.route("/predict", methods=["POST"])
-def predict_cpu_memory():
-    data = request.get_json()
-    if not data or "code" not in data:
-        return {"error": "Missing 'code' in JSON body"}, 400
-    code = data["code"]
-    inputs = tokenizer(code, return_tensors="pt", padding=True, truncation=True)
-    with torch.no_grad():
-        outputs = model(**inputs)
-        preds = torch.sigmoid(outputs.logits).numpy()
-    cpu_time, memory_usage = preds[0]
-    return {
-        "cpu_time": float(f"{cpu_time:.4f}"),
-        "memory_usage": float(f"{memory_usage:.4f}")
-    }
-if __name__ == "__main__":
-    app.run(host="0.0.0.0", port=7860)

+from flask import Flask, request, jsonify
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+import os
+import gc
+app = Flask(__name__)
+model = None
+tokenizer = None
+device = None
+def setup_device():
+    if torch.cuda.is_available():
+        return torch.device('cuda')
+    elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
+        return torch.device('mps')
+    else:
+        return torch.device('cpu')
+def load_model_and_tokenizer():
+    global model, tokenizer, device
+    device = setup_device()
+    print(f"Using device: {device}")
+    try:
+        model_path = "best_model_final"
+        tokenizer = AutoTokenizer.from_pretrained(model_path)
+        model = AutoModelForSequenceClassification.from_pretrained(model_path)
+        model.to(device)
+        model.eval()
+        if device.type == 'cuda':
+            model.half()
+        print("Model and tokenizer loaded successfully!")
+    except Exception as e:
+        print(f"Error loading model/tokenizer: {e}")
+        model = None
+        tokenizer = None
+def cleanup_gpu_memory():
+    if device and device.type == 'cuda':
+        torch.cuda.empty_cache()
+    gc.collect()
+@app.route("/", methods=['GET'])
+def home():
+    return jsonify({
+        "message": "Code Efficiency Prediction API",
+        "status": "Model loaded" if model is not None else "Model not loaded",
+        "device": str(device) if device else "unknown",
+        "endpoints": {
+            "/predict": "POST with JSON body containing 'codes' array"
+        }
+    })
+@app.route("/predict", methods=['POST'])
+def predict_batch():
+    try:
+        if model is None or tokenizer is None:
+            return jsonify({"error": "Model not loaded properly"}), 500
+        data = request.get_json()
+        if not data or 'codes' not in data:
+            return jsonify({"error": "Missing 'codes' field in JSON body"}), 400
+        codes = data['codes']
+        if not isinstance(codes, list) or len(codes) == 0:
+            return jsonify({"error": "'codes' must be a non-empty array"}), 400
+        if len(codes) > 100:
+            return jsonify({"error": "Too many codes. Maximum 100 allowed."}), 400
+        validated_codes = []
+        for i, code in enumerate(codes):
+            if not isinstance(code, str):
+                return jsonify({"error": f"Code at index {i} must be a string"}), 400
+            if len(code.strip()) == 0:
+                validated_codes.append("# empty code")
+            elif len(code) > 50000:
+                return jsonify({"error": f"Code at index {i} too long. Maximum 50000 characters."}), 400
+            else:
+                validated_codes.append(code.strip())
+        batch_size = min(len(validated_codes), 16)
+        results = []
+        for i in range(0, len(validated_codes), batch_size):
+            batch = validated_codes[i:i+batch_size]
+            inputs = tokenizer(
+                batch,
+                padding=True,
+                truncation=True,
+                max_length=512,
+                return_tensors="pt"
+            )
+            inputs = {k: v.to(device) for k, v in inputs.items()}
+            with torch.no_grad():
+                if device.type == 'cuda':
+                    with torch.cuda.amp.autocast():
+                        outputs = model(**inputs)
+                else:
+                    outputs = model(**inputs)
+            preds = torch.sigmoid(outputs.logits).cpu().numpy()
+            for pred in preds:
+                cpu_time, memory_usage = pred
+                results.append({
+                    "cpu_time": round(float(cpu_time), 4),
+                    "memory_usage": round(float(memory_usage), 4)
+                })
+            cleanup_gpu_memory()
+        return jsonify({"results": results})
+    except Exception as e:
+        cleanup_gpu_memory()
+        return jsonify({"error": f"Batch prediction error: {str(e)}"}), 500
+@app.route("/health", methods=['GET'])
+def health_check():
+    return jsonify({
+        "status": "healthy",
+        "model_loaded": model is not None,
+        "tokenizer_loaded": tokenizer is not None,
+        "device": str(device) if device else "unknown"
+    })
+if __name__ == "__main__":
+    load_model_and_tokenizer()
+    app.run(host="0.0.0.0", port=7860, debug=False, threaded=True)