5 / util.py
yuzzznh's picture
Create util.py
99ca6d6 verified
import json
def validate_jsonl(file_path):
with open(file_path, 'r') as file:
for line_number, line in enumerate(file, start=1):
try:
data = json.loads(line)
if 'messages' not in data:
print(f"Line {line_number}: Missing 'messages' key")
return
messages = data['messages']
if not isinstance(messages, list):
print(f"Line {line_number}: 'messages' is not a list")
return
for message in messages:
if not isinstance(message, dict):
print(f"Line {line_number}: A message is not a dictionary")
return
if 'role' not in message or 'content' not in message:
print(f"Line {line_number}: Missing 'role' or 'content' key in a message")
return
except json.JSONDecodeError:
print(f"Line {line_number}: Invalid JSON")
return
print("Validation successful: All data conforms to the schema.")
validate_jsonl("train.jsonl")
validate_jsonl("test.jsonl")