Spaces:
Running
Running
github-actions[bot]
commited on
Commit
·
5ecd2f9
1
Parent(s):
38593e7
Sync turing folder from GitHub
Browse files- turing/modeling/models/codeBerta.py +25 -44
- turing/modeling/models/graphCodeBert.py +17 -24
- turing/modeling/predict.py +21 -14
- turing/monitoring/locustfile.py +46 -0
turing/modeling/models/codeBerta.py
CHANGED
|
@@ -32,7 +32,6 @@ warnings.filterwarnings("ignore")
|
|
| 32 |
|
| 33 |
def compute_metrics(eval_pred):
|
| 34 |
predictions, labels = eval_pred
|
| 35 |
-
|
| 36 |
# Sigmoid function to convert logits to probabilities
|
| 37 |
probs = 1 / (1 + np.exp(-predictions))
|
| 38 |
|
|
@@ -67,11 +66,11 @@ class CodeBERTaDataset(Dataset):
|
|
| 67 |
"""
|
| 68 |
|
| 69 |
self.encodings = {key: torch.tensor(val) for key, val in encodings.items()}
|
| 70 |
-
|
| 71 |
if labels is not None:
|
| 72 |
if not isinstance(labels, (np.ndarray, torch.Tensor)):
|
| 73 |
labels = np.array(labels)
|
| 74 |
-
|
| 75 |
# Case A: labels are indices (integers)
|
| 76 |
if num_labels is not None and (len(labels.shape) == 1 or (len(labels.shape) == 2 and labels.shape[1] == 1)):
|
| 77 |
labels_flat = labels.flatten()
|
|
@@ -149,12 +148,11 @@ class CodeBERTa(BaseModel):
|
|
| 149 |
"early_stopping_patience": 3,
|
| 150 |
"early_stopping_threshold": 0.005
|
| 151 |
}
|
| 152 |
-
|
| 153 |
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 154 |
self.tokenizer = None
|
| 155 |
-
|
| 156 |
-
super().__init__(language, path)
|
| 157 |
|
|
|
|
| 158 |
|
| 159 |
def setup_model(self):
|
| 160 |
"""
|
|
@@ -162,7 +160,7 @@ class CodeBERTa(BaseModel):
|
|
| 162 |
"""
|
| 163 |
|
| 164 |
logger.info(f"Initializing {self.params['model_name_hf']} on {self.device}...")
|
| 165 |
-
|
| 166 |
self.tokenizer = AutoTokenizer.from_pretrained(self.params["model_name_hf"])
|
| 167 |
self.model = AutoModelForSequenceClassification.from_pretrained(
|
| 168 |
self.params["model_name_hf"],
|
|
@@ -218,23 +216,21 @@ class CodeBERTa(BaseModel):
|
|
| 218 |
if self.model is None:
|
| 219 |
raise ValueError("Model is not initialized. Call setup_model() before training.")
|
| 220 |
|
| 221 |
-
# log parameters to MLflow without model_name_hf
|
| 222 |
params_to_log = {k: v for k, v in self.params.items() if k != "model_name_hf" and k != "num_labels"}
|
| 223 |
-
|
| 224 |
logger.info(f"Starting training for: {self.language.upper()}")
|
| 225 |
-
|
| 226 |
# Prepare dataset (train/val split)
|
| 227 |
train_encodings = self._tokenize(X_train)
|
| 228 |
full_dataset = CodeBERTaDataset(train_encodings, y_train, num_labels=self.params["num_labels"])
|
|
|
|
| 229 |
train_size = int(self.params["train_size"] * len(full_dataset))
|
| 230 |
val_size = len(full_dataset) - train_size
|
| 231 |
train_dataset, val_dataset = torch.utils.data.random_split(full_dataset, [train_size, val_size])
|
| 232 |
|
| 233 |
temp_ckpt_dir = os.path.join(MODELS_DIR, "temp_checkpoints")
|
| 234 |
-
|
| 235 |
use_fp16 = torch.cuda.is_available()
|
| 236 |
-
if not use_fp16:
|
| 237 |
-
logger.info("Mixed Precision (fp16) disabled because CUDA is not available.")
|
| 238 |
|
| 239 |
training_args = TrainingArguments(
|
| 240 |
output_dir=temp_ckpt_dir,
|
|
@@ -314,9 +310,8 @@ class CodeBERTa(BaseModel):
|
|
| 314 |
idx = int(label_idx)
|
| 315 |
if 0 <= idx < num_labels:
|
| 316 |
y_test_expanded[i, idx] = 1
|
| 317 |
-
|
| 318 |
-
y_test_np = y_test_expanded
|
| 319 |
|
|
|
|
| 320 |
# Generate classification report
|
| 321 |
report = classification_report(y_test_np, y_pred, zero_division=0)
|
| 322 |
print("\n" + "=" * 50)
|
|
@@ -330,12 +325,8 @@ class CodeBERTa(BaseModel):
|
|
| 330 |
"recall": recall_score(y_test_np, y_pred, average="macro", zero_division=0),
|
| 331 |
"f1_score": f1_score(y_test_np, y_pred, average="macro"),
|
| 332 |
}
|
| 333 |
-
|
| 334 |
mlflow.log_metrics(metrics)
|
| 335 |
-
|
| 336 |
-
logger.info(
|
| 337 |
-
f"Evaluation completed — Accuracy: {metrics['accuracy']:.3f}, F1: {metrics['f1_score']:.3f}"
|
| 338 |
-
)
|
| 339 |
return metrics
|
| 340 |
|
| 341 |
|
|
@@ -350,36 +341,28 @@ class CodeBERTa(BaseModel):
|
|
| 350 |
Returns:
|
| 351 |
np.ndarray: Multi-Hot Encoded predictions (e.g., [[0, 1, 1, 0], ...])
|
| 352 |
"""
|
| 353 |
-
|
| 354 |
if self.model is None:
|
| 355 |
raise ValueError("Model is not trained. Call train() or load() before prediction.")
|
| 356 |
|
| 357 |
# Set model to evaluation mode
|
| 358 |
self.model.eval()
|
| 359 |
|
|
|
|
| 360 |
encodings = self._tokenize(X)
|
| 361 |
-
# Pass None as labels because we are in inference
|
| 362 |
-
dataset = CodeBERTaDataset(encodings, labels=None)
|
| 363 |
|
| 364 |
-
|
|
|
|
| 365 |
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
report_to="none",
|
| 371 |
-
no_cuda=not torch.cuda.is_available()
|
| 372 |
-
)
|
| 373 |
|
| 374 |
-
|
| 375 |
-
|
| 376 |
|
| 377 |
-
#
|
| 378 |
-
if os.path.exists("./pred_temp"):
|
| 379 |
-
shutil.rmtree("./pred_temp")
|
| 380 |
-
|
| 381 |
-
# Convert logits to probabilities
|
| 382 |
-
logits = output.predictions
|
| 383 |
probs = 1 / (1 + np.exp(-logits))
|
| 384 |
|
| 385 |
# Apply a threshold of 0.5 (if prob > 0.5, predict 1 else 0)
|
|
@@ -387,7 +370,6 @@ class CodeBERTa(BaseModel):
|
|
| 387 |
|
| 388 |
return preds_binary
|
| 389 |
|
| 390 |
-
|
| 391 |
def save(self, path, model_name):
|
| 392 |
"""
|
| 393 |
Save model locally and log to MLflow as artifact.
|
|
@@ -420,7 +402,6 @@ class CodeBERTa(BaseModel):
|
|
| 420 |
except Exception as e:
|
| 421 |
logger.error(f"Failed to log model artifacts to MLflow: {e}")
|
| 422 |
|
| 423 |
-
|
| 424 |
def load(self, model_path):
|
| 425 |
"""
|
| 426 |
Load model from a local path OR an MLflow URI.
|
|
@@ -447,14 +428,14 @@ class CodeBERTa(BaseModel):
|
|
| 447 |
try:
|
| 448 |
if not os.path.exists(local_model_path):
|
| 449 |
raise FileNotFoundError(f"Model path not found: {local_model_path}")
|
| 450 |
-
|
| 451 |
# Load tokenizer and model from local path
|
| 452 |
self.tokenizer = AutoTokenizer.from_pretrained(local_model_path)
|
| 453 |
self.model = AutoModelForSequenceClassification.from_pretrained(
|
| 454 |
-
local_model_path
|
|
|
|
| 455 |
).to(self.device)
|
| 456 |
logger.info("Model loaded from local path successfully.")
|
| 457 |
-
|
| 458 |
except Exception as e:
|
| 459 |
logger.error(f"Failed to load model from local path: {e}")
|
| 460 |
raise e
|
|
|
|
| 32 |
|
| 33 |
def compute_metrics(eval_pred):
|
| 34 |
predictions, labels = eval_pred
|
|
|
|
| 35 |
# Sigmoid function to convert logits to probabilities
|
| 36 |
probs = 1 / (1 + np.exp(-predictions))
|
| 37 |
|
|
|
|
| 66 |
"""
|
| 67 |
|
| 68 |
self.encodings = {key: torch.tensor(val) for key, val in encodings.items()}
|
| 69 |
+
|
| 70 |
if labels is not None:
|
| 71 |
if not isinstance(labels, (np.ndarray, torch.Tensor)):
|
| 72 |
labels = np.array(labels)
|
| 73 |
+
|
| 74 |
# Case A: labels are indices (integers)
|
| 75 |
if num_labels is not None and (len(labels.shape) == 1 or (len(labels.shape) == 2 and labels.shape[1] == 1)):
|
| 76 |
labels_flat = labels.flatten()
|
|
|
|
| 148 |
"early_stopping_patience": 3,
|
| 149 |
"early_stopping_threshold": 0.005
|
| 150 |
}
|
| 151 |
+
|
| 152 |
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 153 |
self.tokenizer = None
|
|
|
|
|
|
|
| 154 |
|
| 155 |
+
super().__init__(language, path)
|
| 156 |
|
| 157 |
def setup_model(self):
|
| 158 |
"""
|
|
|
|
| 160 |
"""
|
| 161 |
|
| 162 |
logger.info(f"Initializing {self.params['model_name_hf']} on {self.device}...")
|
| 163 |
+
|
| 164 |
self.tokenizer = AutoTokenizer.from_pretrained(self.params["model_name_hf"])
|
| 165 |
self.model = AutoModelForSequenceClassification.from_pretrained(
|
| 166 |
self.params["model_name_hf"],
|
|
|
|
| 216 |
if self.model is None:
|
| 217 |
raise ValueError("Model is not initialized. Call setup_model() before training.")
|
| 218 |
|
| 219 |
+
# log parameters to MLflow without model_name_hf
|
| 220 |
params_to_log = {k: v for k, v in self.params.items() if k != "model_name_hf" and k != "num_labels"}
|
|
|
|
| 221 |
logger.info(f"Starting training for: {self.language.upper()}")
|
| 222 |
+
|
| 223 |
# Prepare dataset (train/val split)
|
| 224 |
train_encodings = self._tokenize(X_train)
|
| 225 |
full_dataset = CodeBERTaDataset(train_encodings, y_train, num_labels=self.params["num_labels"])
|
| 226 |
+
full_dataset = CodeBERTaDataset(train_encodings, y_train, num_labels=self.params["num_labels"])
|
| 227 |
train_size = int(self.params["train_size"] * len(full_dataset))
|
| 228 |
val_size = len(full_dataset) - train_size
|
| 229 |
train_dataset, val_dataset = torch.utils.data.random_split(full_dataset, [train_size, val_size])
|
| 230 |
|
| 231 |
temp_ckpt_dir = os.path.join(MODELS_DIR, "temp_checkpoints")
|
| 232 |
+
|
| 233 |
use_fp16 = torch.cuda.is_available()
|
|
|
|
|
|
|
| 234 |
|
| 235 |
training_args = TrainingArguments(
|
| 236 |
output_dir=temp_ckpt_dir,
|
|
|
|
| 310 |
idx = int(label_idx)
|
| 311 |
if 0 <= idx < num_labels:
|
| 312 |
y_test_expanded[i, idx] = 1
|
|
|
|
|
|
|
| 313 |
|
| 314 |
+
y_test_np = y_test_expanded
|
| 315 |
# Generate classification report
|
| 316 |
report = classification_report(y_test_np, y_pred, zero_division=0)
|
| 317 |
print("\n" + "=" * 50)
|
|
|
|
| 325 |
"recall": recall_score(y_test_np, y_pred, average="macro", zero_division=0),
|
| 326 |
"f1_score": f1_score(y_test_np, y_pred, average="macro"),
|
| 327 |
}
|
|
|
|
| 328 |
mlflow.log_metrics(metrics)
|
| 329 |
+
logger.info(f"Evaluation completed — Accuracy: {metrics['accuracy']:.3f}, F1: {metrics['f1_score']:.3f}")
|
|
|
|
|
|
|
|
|
|
| 330 |
return metrics
|
| 331 |
|
| 332 |
|
|
|
|
| 341 |
Returns:
|
| 342 |
np.ndarray: Multi-Hot Encoded predictions (e.g., [[0, 1, 1, 0], ...])
|
| 343 |
"""
|
| 344 |
+
|
| 345 |
if self.model is None:
|
| 346 |
raise ValueError("Model is not trained. Call train() or load() before prediction.")
|
| 347 |
|
| 348 |
# Set model to evaluation mode
|
| 349 |
self.model.eval()
|
| 350 |
|
| 351 |
+
# Tokenize inputs
|
| 352 |
encodings = self._tokenize(X)
|
|
|
|
|
|
|
| 353 |
|
| 354 |
+
# Convert lists to tensors and move to device
|
| 355 |
+
inputs = {key: torch.tensor(val).to(self.device) for key, val in encodings.items()}
|
| 356 |
|
| 357 |
+
# Inference (no gradients, lightweight)
|
| 358 |
+
with torch.no_grad():
|
| 359 |
+
outputs = self.model(**inputs)
|
| 360 |
+
logits = outputs.logits
|
|
|
|
|
|
|
|
|
|
| 361 |
|
| 362 |
+
# Move back to CPU and convert to numpy
|
| 363 |
+
logits = logits.cpu().numpy()
|
| 364 |
|
| 365 |
+
# Sigmoid + Threshold
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 366 |
probs = 1 / (1 + np.exp(-logits))
|
| 367 |
|
| 368 |
# Apply a threshold of 0.5 (if prob > 0.5, predict 1 else 0)
|
|
|
|
| 370 |
|
| 371 |
return preds_binary
|
| 372 |
|
|
|
|
| 373 |
def save(self, path, model_name):
|
| 374 |
"""
|
| 375 |
Save model locally and log to MLflow as artifact.
|
|
|
|
| 402 |
except Exception as e:
|
| 403 |
logger.error(f"Failed to log model artifacts to MLflow: {e}")
|
| 404 |
|
|
|
|
| 405 |
def load(self, model_path):
|
| 406 |
"""
|
| 407 |
Load model from a local path OR an MLflow URI.
|
|
|
|
| 428 |
try:
|
| 429 |
if not os.path.exists(local_model_path):
|
| 430 |
raise FileNotFoundError(f"Model path not found: {local_model_path}")
|
| 431 |
+
|
| 432 |
# Load tokenizer and model from local path
|
| 433 |
self.tokenizer = AutoTokenizer.from_pretrained(local_model_path)
|
| 434 |
self.model = AutoModelForSequenceClassification.from_pretrained(
|
| 435 |
+
local_model_path,
|
| 436 |
+
low_cpu_mem_usage=False
|
| 437 |
).to(self.device)
|
| 438 |
logger.info("Model loaded from local path successfully.")
|
|
|
|
| 439 |
except Exception as e:
|
| 440 |
logger.error(f"Failed to load model from local path: {e}")
|
| 441 |
raise e
|
turing/modeling/models/graphCodeBert.py
CHANGED
|
@@ -353,39 +353,31 @@ class GraphCodeBERTClassifier(BaseModel):
|
|
| 353 |
|
| 354 |
Returns:
|
| 355 |
np.ndarray: Multi-Hot Encoded predictions (e.g., [[0, 1, 1, 0], ...])
|
|
|
|
| 356 |
"""
|
| 357 |
-
|
| 358 |
if self.model is None:
|
| 359 |
raise ValueError("Model is not trained. Call train() or load() before prediction.")
|
| 360 |
|
| 361 |
# Set model to evaluation mode
|
| 362 |
self.model.eval()
|
| 363 |
|
|
|
|
| 364 |
encodings = self._tokenize(X)
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
use_fp16 = torch.cuda.is_available()
|
| 369 |
-
|
| 370 |
-
training_args = TrainingArguments(
|
| 371 |
-
output_dir="./pred_temp",
|
| 372 |
-
per_device_eval_batch_size=self.params["batch_size_eval"],
|
| 373 |
-
fp16=use_fp16,
|
| 374 |
-
report_to="none",
|
| 375 |
-
no_cuda=not torch.cuda.is_available(),
|
| 376 |
-
)
|
| 377 |
|
| 378 |
-
|
| 379 |
-
|
|
|
|
|
|
|
| 380 |
|
| 381 |
-
#
|
| 382 |
-
|
| 383 |
-
shutil.rmtree("./pred_temp")
|
| 384 |
|
| 385 |
-
#
|
| 386 |
-
logits = output.predictions
|
| 387 |
probs = 1 / (1 + np.exp(-logits))
|
| 388 |
-
|
| 389 |
# Apply a threshold of 0.5 (if prob > 0.5, predict 1 else 0)
|
| 390 |
preds_binary = (probs > 0.5).astype(int)
|
| 391 |
|
|
@@ -456,9 +448,10 @@ class GraphCodeBERTClassifier(BaseModel):
|
|
| 456 |
|
| 457 |
# Load tokenizer and model from local path
|
| 458 |
self.tokenizer = AutoTokenizer.from_pretrained(local_model_path)
|
| 459 |
-
self.model = AutoModelForSequenceClassification.from_pretrained(
|
| 460 |
-
|
| 461 |
-
|
|
|
|
| 462 |
logger.info("Model loaded from local path successfully.")
|
| 463 |
|
| 464 |
except Exception as e:
|
|
|
|
| 353 |
|
| 354 |
Returns:
|
| 355 |
np.ndarray: Multi-Hot Encoded predictions (e.g., [[0, 1, 1, 0], ...])
|
| 356 |
+
Make predictions for Multi-Label classification using direct PyTorch inference.
|
| 357 |
"""
|
|
|
|
| 358 |
if self.model is None:
|
| 359 |
raise ValueError("Model is not trained. Call train() or load() before prediction.")
|
| 360 |
|
| 361 |
# Set model to evaluation mode
|
| 362 |
self.model.eval()
|
| 363 |
|
| 364 |
+
# Tokenize inputs
|
| 365 |
encodings = self._tokenize(X)
|
| 366 |
+
|
| 367 |
+
# Convert lists to tensors and move to device
|
| 368 |
+
inputs = {key: torch.tensor(val).to(self.device) for key, val in encodings.items()}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 369 |
|
| 370 |
+
# Inference (no gradients, lightweight)
|
| 371 |
+
with torch.no_grad():
|
| 372 |
+
outputs = self.model(**inputs)
|
| 373 |
+
logits = outputs.logits
|
| 374 |
|
| 375 |
+
# Move back to CPU and convert to numpy
|
| 376 |
+
logits = logits.cpu().numpy()
|
|
|
|
| 377 |
|
| 378 |
+
# Sigmoid + Threshold
|
|
|
|
| 379 |
probs = 1 / (1 + np.exp(-logits))
|
| 380 |
+
|
| 381 |
# Apply a threshold of 0.5 (if prob > 0.5, predict 1 else 0)
|
| 382 |
preds_binary = (probs > 0.5).astype(int)
|
| 383 |
|
|
|
|
| 448 |
|
| 449 |
# Load tokenizer and model from local path
|
| 450 |
self.tokenizer = AutoTokenizer.from_pretrained(local_model_path)
|
| 451 |
+
self.model = AutoModelForSequenceClassification.from_pretrained(
|
| 452 |
+
local_model_path,
|
| 453 |
+
low_cpu_mem_usage=False
|
| 454 |
+
).to(self.device)
|
| 455 |
logger.info("Model loaded from local path successfully.")
|
| 456 |
|
| 457 |
except Exception as e:
|
turing/modeling/predict.py
CHANGED
|
@@ -39,6 +39,7 @@ class ModelInference:
|
|
| 39 |
warnings.filterwarnings("ignore")
|
| 40 |
self.dataset_manager = DatasetManager()
|
| 41 |
self.use_best_model_tags = use_best_model_tags
|
|
|
|
| 42 |
|
| 43 |
# Initialize model registry based on configuration
|
| 44 |
if use_best_model_tags:
|
|
@@ -141,20 +142,26 @@ class ModelInference:
|
|
| 141 |
model_config = self.model_registry[language]
|
| 142 |
run_id = model_config["run_id"]
|
| 143 |
artifact_name = model_config["artifact"]
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
|
| 159 |
# 3. Predict
|
| 160 |
raw_predictions = model.predict(texts)
|
|
|
|
| 39 |
warnings.filterwarnings("ignore")
|
| 40 |
self.dataset_manager = DatasetManager()
|
| 41 |
self.use_best_model_tags = use_best_model_tags
|
| 42 |
+
self.loaded_models = {}
|
| 43 |
|
| 44 |
# Initialize model registry based on configuration
|
| 45 |
if use_best_model_tags:
|
|
|
|
| 142 |
model_config = self.model_registry[language]
|
| 143 |
run_id = model_config["run_id"]
|
| 144 |
artifact_name = model_config["artifact"]
|
| 145 |
+
if language not in self.loaded_models:
|
| 146 |
+
logger.info(f"Model for {language} not in memory. Loading...")
|
| 147 |
+
|
| 148 |
+
model_id = model_config["model_id"]
|
| 149 |
+
|
| 150 |
+
# Dynamically import model class
|
| 151 |
+
config_entry = MODEL_CONFIG[model_id]
|
| 152 |
+
module_name = config_entry["model_class_module"]
|
| 153 |
+
class_name = config_entry["model_class_name"]
|
| 154 |
+
module = importlib.import_module(module_name)
|
| 155 |
+
model_class = getattr(module, class_name)
|
| 156 |
+
|
| 157 |
+
# Get Model Path (Local Cache or Download)
|
| 158 |
+
model_path = self._get_cached_model_path(run_id, artifact_name, language)
|
| 159 |
+
|
| 160 |
+
# Load Model and store in cache
|
| 161 |
+
self.loaded_models[language] = model_class(language=language, path=model_path)
|
| 162 |
+
logger.success(f"Model for {language} loaded into memory.")
|
| 163 |
+
|
| 164 |
+
model = self.loaded_models[language]
|
| 165 |
|
| 166 |
# 3. Predict
|
| 167 |
raw_predictions = model.predict(texts)
|
turing/monitoring/locustfile.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
|
| 3 |
+
from locust import HttpUser, between, task
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class TuringApiUser(HttpUser):
|
| 7 |
+
# Wait time between requests
|
| 8 |
+
wait_time = between(1, 5)
|
| 9 |
+
|
| 10 |
+
# List of supported languages
|
| 11 |
+
languages = ["python", "java", "pharo"]
|
| 12 |
+
|
| 13 |
+
# Code examples
|
| 14 |
+
code_snippets = {
|
| 15 |
+
"python": ["def init(self): pass", "print('Hello World')", "import os"],
|
| 16 |
+
"java": ["public static void main(String[] args)", "System.out.println(e);", "private int x = 0;"],
|
| 17 |
+
"pharo": ["Transcript show: 'Hello'.", "^ self size", "Object subclass: #Name"]
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
@task(1)
|
| 21 |
+
def health_check(self):
|
| 22 |
+
"""
|
| 23 |
+
Checks if the API is alive.
|
| 24 |
+
"""
|
| 25 |
+
self.client.get("/")
|
| 26 |
+
|
| 27 |
+
@task(3)
|
| 28 |
+
def predict_code_classification(self):
|
| 29 |
+
"""
|
| 30 |
+
Sends a prediction request by choosing a random language.
|
| 31 |
+
"""
|
| 32 |
+
# Randomly selects one of the three languages
|
| 33 |
+
selected_lang = random.choice(self.languages)
|
| 34 |
+
|
| 35 |
+
# Selects consistent snippets (
|
| 36 |
+
texts = self.code_snippets.get(selected_lang, ["generic code"])
|
| 37 |
+
|
| 38 |
+
payload = {
|
| 39 |
+
"texts": texts,
|
| 40 |
+
"language": selected_lang
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
headers = {'Content-Type': 'application/json'}
|
| 44 |
+
|
| 45 |
+
# Perform the request
|
| 46 |
+
self.client.post("/predict", json=payload, headers=headers, name="/predict (random lang)")
|