Latest-JSON-Env / complex_json_output.py

Upload folder using huggingface_hub

e0b3d16 verified 7 months ago

13.1 kB

	import json
	from datasets import load_dataset

	import verifiers as vf


	def load_environment(
	num_train_examples=7000,
	num_eval_examples=1000,
	**kwargs
	):
	"""
	Environment for verifying complex JSON output from models.

	The task requires models to:
	1. Parse multi-question prompts
	2. Generate valid JSON responses
	3. Match the expected structure with correct keys and values

	Reward structure (multiplicative to prevent local minima):
	- If JSON fails to parse: reward = 0
	- Otherwise:
	* key_accuracy = (correct_keys) / (total_keys_in_response)
	* value_accuracy = (correct_values) / (total_values_in_response)
	* final_reward = key_accuracy * value_accuracy

	This penalizes both missing keys/values AND adding extra incorrect ones.
	"""

	# Load dataset from HuggingFace
	dataset = load_dataset("Delta-Vector/Tauri-Complex-JSON-Formatting", split="train")

	# Map to expected format - keep verification_info as string to avoid schema issues
	def format_example(example):
	return {
	"question": example["prompt"],
	"info": {"verification_info": example["verification_info"]}, # Keep as dict with string
	}

	dataset = dataset.map(format_example, remove_columns=dataset.column_names)

	# Split into train and eval
	train_dataset = dataset.select(range(num_train_examples))
	eval_dataset = dataset.select(range(num_train_examples, num_train_examples + num_eval_examples))

	# Custom extract function to parse JSON from code blocks or raw text
	def extract_json_from_completion(completion):
	"""Extract JSON from completion, handling code blocks."""
	if not completion:
	return ""

	# Get the last message content
	if isinstance(completion, list) and len(completion) > 0:
	content = completion[-1].get("content", "")
	else:
	content = str(completion)

	# Try to extract from code blocks first (```json ... ``` or ``` ... ```)
	import re
	code_block_pattern = r"```(?:json)?\s\n(.?)\n```"
	matches = re.findall(code_block_pattern, content, re.DOTALL)
	if matches:
	return matches[-1].strip() # Return last code block

	# Otherwise return the content as-is
	return content.strip()

	# Use simple Parser with custom extract function
	parser = vf.Parser(extract_fn=extract_json_from_completion)

	def multiplicative_reward(completion, info, **kwargs) -> float:
	"""
	Multiplicative reward: key_accuracy * value_accuracy.

	Returns 0 if JSON fails to parse.
	Otherwise:
	- key_accuracy = (correct_keys) / (total_keys_in_response)
	- value_accuracy = (correct_values) / (total_values_in_response)
	- final_reward = key_accuracy * value_accuracy

	This penalizes both missing correct items AND adding extra incorrect ones.
	"""
	try:
	response = parser.parse_answer(completion) or ""
	response = response.strip()

	# Check: Valid JSON format
	if not response:
	return 0.0

	try:
	parsed_response = json.loads(response)
	except (json.JSONDecodeError, ValueError):
	return 0.0

	# Must be a dict
	if not isinstance(parsed_response, dict):
	return 0.0

	# Parse ground truth from info
	verification_info = json.loads(info["verification_info"])
	ground_truth = verification_info["ground_truth"]

	# Get all keys recursively with their full paths
	def get_all_keys(d, prefix=""):
	keys = set()
	if isinstance(d, dict):
	for k, v in d.items():
	full_key = f"{prefix}.{k}" if prefix else k
	keys.add(full_key)
	keys.update(get_all_keys(v, full_key))
	return keys

	# Get all values recursively
	def get_all_values(d):
	values = []
	if isinstance(d, dict):
	for v in d.values():
	if isinstance(v, dict):
	values.extend(get_all_values(v))
	elif isinstance(v, list):
	values.extend(get_all_values({"_": item} for item in v))
	else:
	values.append(v)
	return values

	ground_truth_keys = get_all_keys(ground_truth)
	response_keys = get_all_keys(parsed_response)

	# Calculate key accuracy
	if len(response_keys) == 0:
	key_accuracy = 0.0
	else:
	correct_keys = len(ground_truth_keys & response_keys) # Intersection
	key_accuracy = correct_keys / len(response_keys)

	# Calculate value accuracy by checking each value at correct key paths
	def get_value_at_path(d, path):
	"""Get value at a specific key path like 'a.b.c'"""
	keys = path.split('.')
	current = d
	try:
	for key in keys:
	current = current[key]
	return current
	except (KeyError, TypeError):
	return None

	# Helper function to compare values with numeric type tolerance
	def values_equal(a, b):
	"""Compare values with numeric type tolerance (25 == 25.0)"""
	# Handle numeric comparison (int vs float)
	if isinstance(a, (int, float)) and isinstance(b, (int, float)):
	return a == b # Python handles int/float equality correctly
	# For everything else, use strict equality
	return a == b

	# Only check values for keys that exist in both
	common_keys = ground_truth_keys & response_keys
	total_values_checked = len(response_keys) # We check all response keys

	if total_values_checked == 0:
	value_accuracy = 0.0
	else:
	correct_values = 0
	for key_path in response_keys:
	response_val = get_value_at_path(parsed_response, key_path)
	ground_truth_val = get_value_at_path(ground_truth, key_path)

	# If key exists in ground truth and values match
	if ground_truth_val is not None and values_equal(response_val, ground_truth_val):
	correct_values += 1

	value_accuracy = correct_values / total_values_checked

	# Multiply together
	final_reward = key_accuracy * value_accuracy
	return final_reward

	except (AttributeError, TypeError, KeyError) as e:
	return 0.0

	def format_reward(completion, **kwargs) -> float:
	"""
	Reward for valid JSON formatting.
	Returns 0.33 for valid JSON dict, 0 for invalid.
	"""
	try:
	response = parser.parse_answer(completion) or ""
	response = response.strip()

	# Check if response is not empty
	if not response:
	return 0.0

	# Try to parse as JSON
	parsed = json.loads(response)

	# Must be a dict (since ground truth is always a dict)
	if not isinstance(parsed, dict):
	return 0.0

	return 0.33
	except (json.JSONDecodeError, ValueError, TypeError):
	return 0.0

	def keys_match_reward(completion, info, **kwargs) -> float:
	"""
	Metric: key accuracy (correct_keys / total_keys_in_response).
	Returns the same key_accuracy used in multiplicative_reward.
	"""
	try:
	response = parser.parse_answer(completion) or ""
	response = response.strip()

	if not response:
	return 0.0

	parsed_response = json.loads(response)

	if not isinstance(parsed_response, dict):
	return 0.0

	# Parse ground truth from info
	verification_info = json.loads(info["verification_info"])
	ground_truth = verification_info["ground_truth"]

	# Get all keys from ground truth (recursively)
	def get_all_keys(d, prefix=""):
	keys = set()
	if isinstance(d, dict):
	for k, v in d.items():
	full_key = f"{prefix}.{k}" if prefix else k
	keys.add(full_key)
	keys.update(get_all_keys(v, full_key))
	return keys

	ground_truth_keys = get_all_keys(ground_truth)
	response_keys = get_all_keys(parsed_response)

	if len(response_keys) == 0:
	return 0.0

	correct_keys = len(ground_truth_keys & response_keys)
	return correct_keys / len(response_keys)

	except (json.JSONDecodeError, ValueError, AttributeError, TypeError):
	return 0.0

	def values_match_reward(completion, info, **kwargs) -> float:
	"""
	Metric: value accuracy (correct_values / total_values_in_response).
	Returns the same value_accuracy used in multiplicative_reward.
	"""
	try:
	response = parser.parse_answer(completion) or ""
	response = response.strip()

	if not response:
	return 0.0

	parsed_response = json.loads(response)

	if not isinstance(parsed_response, dict):
	return 0.0

	# Parse ground truth from info
	verification_info = json.loads(info["verification_info"])
	ground_truth = verification_info["ground_truth"]

	# Helper function to compare values with numeric type tolerance
	def values_equal(a, b):
	if isinstance(a, (int, float)) and isinstance(b, (int, float)):
	return a == b
	return a == b

	# Get all keys recursively
	def get_all_keys(d, prefix=""):
	keys = set()
	if isinstance(d, dict):
	for k, v in d.items():
	full_key = f"{prefix}.{k}" if prefix else k
	keys.add(full_key)
	keys.update(get_all_keys(v, full_key))
	return keys

	def get_value_at_path(d, path):
	keys = path.split('.')
	current = d
	try:
	for key in keys:
	current = current[key]
	return current
	except (KeyError, TypeError):
	return None

	response_keys = get_all_keys(parsed_response)

	if len(response_keys) == 0:
	return 0.0

	correct_values = 0
	for key_path in response_keys:
	response_val = get_value_at_path(parsed_response, key_path)
	ground_truth_val = get_value_at_path(ground_truth, key_path)

	if ground_truth_val is not None and values_equal(response_val, ground_truth_val):
	correct_values += 1

	return correct_values / len(response_keys)

	except (json.JSONDecodeError, ValueError, AttributeError, TypeError):
	return 0.0

	# Create rubric with multiplicative reward
	# Keep individual functions for debugging/metrics but use multiplicative for training
	rubric = vf.Rubric(
	parser=parser,
	funcs=[
	multiplicative_reward, # Main reward - key_acc * value_acc
	format_reward, # Metric only (weight 0)
	keys_match_reward, # Metric only (weight 0)
	values_match_reward, # Metric only (weight 0)
	],
	weights=[1.0, 0.0, 0.0, 0.0] # Only multiplicative_reward counts
	)

	# Return SingleTurnEnv since this is a one-shot task
	# No system prompt - let the dataset prompt speak for itself
	vf_env = vf.SingleTurnEnv(
	dataset=train_dataset,
	eval_dataset=eval_dataset,
	parser=parser,
	rubric=rubric,
	)

	return vf_env