galb-dai commited on
Commit
c54822c
Β·
1 Parent(s): 69f2af8

Added logging to validation logic.

Browse files
Files changed (1) hide show
  1. src/validation/validate.py +18 -2
src/validation/validate.py CHANGED
@@ -2,6 +2,8 @@ import json
2
  import os
3
  import string
4
 
 
 
5
  DATASET_SIZE = 120
6
 
7
  MIN_INPUT_LENGTH = 2
@@ -12,6 +14,8 @@ MAX_SUBMISSION_SIZE = 1024 * 1024 * 120 # 120 MB.
12
  MAX_SINGLE_SUBMISSION_SIZE = 1024 * 1024 # 1MB.
13
  MAX_SUBMISSION_LINES = DATASET_SIZE + 1 # Allow empty line.
14
 
 
 
15
 
16
  def is_valid(
17
  s: str,
@@ -51,10 +55,12 @@ def is_submission_file_valid(submission_path: str) -> bool:
51
  """
52
 
53
  if not os.path.exists(submission_path):
 
54
  return False
55
 
56
  submission_size = os.stat(submission_path).st_size
57
  if submission_size < MIN_SUBMISSION_SIZE or submission_size > MAX_SUBMISSION_SIZE:
 
58
  return False
59
 
60
  with open(submission_path, "r") as f:
@@ -65,24 +71,34 @@ def is_submission_file_valid(submission_path: str) -> bool:
65
  while len(line := f.readline(MAX_SINGLE_SUBMISSION_SIZE)) > 0:
66
  n_lines += 1
67
  if n_lines > MAX_SUBMISSION_LINES:
 
68
  return False
69
 
70
  if not line.startswith("{") or not line.endswith("}"):
 
71
  return False
72
 
73
  d = json.loads(line)
74
  if set(d.keys()) != set(["problem_id", "solution"]):
 
75
  return False
76
 
77
  if not ((type(d["problem_id"]) is str or type(d["problem_id"]) is int) and type(d["solution"] is str)):
 
78
  return False
79
- if not d["problem_id"].isdigit():
 
 
 
 
80
  return False
81
- problem_id = int(d["problem_id"])
82
  if problem_id < 0 or problem_id >= DATASET_SIZE:
 
83
  return False
84
 
85
  if problem_id in seen_ids:
 
86
  return False # Duplicate submission.
87
  seen_ids.add(problem_id)
88
 
 
2
  import os
3
  import string
4
 
5
+ from src.logger import get_logger
6
+
7
  DATASET_SIZE = 120
8
 
9
  MIN_INPUT_LENGTH = 2
 
14
  MAX_SINGLE_SUBMISSION_SIZE = 1024 * 1024 # 1MB.
15
  MAX_SUBMISSION_LINES = DATASET_SIZE + 1 # Allow empty line.
16
 
17
+ logger = get_logger()
18
+
19
 
20
  def is_valid(
21
  s: str,
 
55
  """
56
 
57
  if not os.path.exists(submission_path):
58
+ logger.warning(f"Could not find submission file {submission_path=}")
59
  return False
60
 
61
  submission_size = os.stat(submission_path).st_size
62
  if submission_size < MIN_SUBMISSION_SIZE or submission_size > MAX_SUBMISSION_SIZE:
63
+ logger.warning(f"Submission size was {submission_size}, exceeding [{MIN_SUBMISSION_SIZE, MAX_SUBMISSION_SIZE}]")
64
  return False
65
 
66
  with open(submission_path, "r") as f:
 
71
  while len(line := f.readline(MAX_SINGLE_SUBMISSION_SIZE)) > 0:
72
  n_lines += 1
73
  if n_lines > MAX_SUBMISSION_LINES:
74
+ logger.warning(f"Got submission with more than {MAX_SUBMISSION_LINES} lines")
75
  return False
76
 
77
  if not line.startswith("{") or not line.endswith("}"):
78
+ logger.warning("Submission has line that does not appear to be a JSONL")
79
  return False
80
 
81
  d = json.loads(line)
82
  if set(d.keys()) != set(["problem_id", "solution"]):
83
+ logger.warning("Found unexpected keys")
84
  return False
85
 
86
  if not ((type(d["problem_id"]) is str or type(d["problem_id"]) is int) and type(d["solution"] is str)):
87
+ logger.warning("Found unexpected types")
88
  return False
89
+
90
+ try:
91
+ problem_id = int(d["problem_id"])
92
+ except Exception:
93
+ logger.warning("Could not convert problem ID to int")
94
  return False
95
+
96
  if problem_id < 0 or problem_id >= DATASET_SIZE:
97
+ logger.warning(f"Problem ID {problem_id} is beyond allowed bounds")
98
  return False
99
 
100
  if problem_id in seen_ids:
101
+ logger.warning(f"Got duplicate submission -- ID {problem_id} appears twice")
102
  return False # Duplicate submission.
103
  seen_ids.add(problem_id)
104