OUDOUD_148_28 / eval_helper.py

Upload folder using huggingface_hub

7bd6c7e verified about 2 months ago

7.3 kB

	import re

	import json

	class EvalHandler:
	def __init__(self):
	self.rule_patterns = {
	'comma_restriction': re.compile(r'no.comma\|without.comma', re.IGNORECASE),
	'placeholder_requirement': re.compile(r'placeholder.\[.\]\|square.*bracket', re.IGNORECASE),
	'lowercase_requirement': re.compile(r'lowercase\|no.capital\|all.lowercase', re.IGNORECASE),
	'capital_frequency': re.compile(r'capital.letter.less.than\|capital.word.*frequency', re.IGNORECASE),
	'quotation_requirement': re.compile(r'wrap.quotation\|double.quote', re.IGNORECASE),
	'json_format': re.compile(r'json.format\|JSON.output\|format.*json', re.IGNORECASE),
	'word_count': re.compile(r'less.than.word\|word.limit\|maximum.word', re.IGNORECASE),
	'section_requirement': re.compile(r'section.start\|SECTION.X', re.IGNORECASE),
	'ending_requirement': re.compile(r'finish.exact.phrase\|end.*phrase', re.IGNORECASE),
	'forbidden_words': re.compile(r'not.allowed\|forbidden.word\|without.*word', re.IGNORECASE),
	'capital_letters_only': re.compile(r'all.capital\|CAPITAL.letter', re.IGNORECASE)
	}

	def detect_rules(self, instruction):
	applicable_rules = []
	if self.rule_patterns['comma_restriction'].search(instruction):
	applicable_rules.append('CommaChecker')
	if self.rule_patterns['placeholder_requirement'].search(instruction):
	applicable_rules.append('PlaceholderChecker')
	if self.rule_patterns['lowercase_requirement'].search(instruction):
	applicable_rules.append('LowercaseLettersEnglishChecker')
	if self.rule_patterns['capital_frequency'].search(instruction):
	applicable_rules.append('CapitalWordFrequencyChecker')
	if self.rule_patterns['quotation_requirement'].search(instruction):
	applicable_rules.append('QuotationChecker')
	if self.rule_patterns['json_format'].search(instruction):
	applicable_rules.append('JsonFormat')
	if self.rule_patterns['word_count'].search(instruction):
	applicable_rules.append('NumberOfWords')
	if self.rule_patterns['section_requirement'].search(instruction):
	applicable_rules.append('SectionChecker')
	if self.rule_patterns['ending_requirement'].search(instruction):
	applicable_rules.append('EndChecker')
	if self.rule_patterns['forbidden_words'].search(instruction):
	applicable_rules.append('ForbiddenWords')
	if self.rule_patterns['capital_letters_only'].search(instruction):
	applicable_rules.append('CapitalLettersEnglishChecker')
	return applicable_rules

	def apply_rule_fix(self, response, rules, instruction= ""):
	for rule in rules:
	if rule == 'CommaChecker':
	response = self._fix_commas(response, instruction)
	elif rule == 'PlaceholderChecker':
	response = self._fix_placeholders(response, instruction)
	elif rule == 'LowercaseLettersEnglishChecker':
	response = self._fix_lowercase(response)
	elif rule == 'CapitalWordFrequencyChecker':
	response = self._fix_capital_frequency(response, instruction)
	elif rule == 'QuotationChecker':
	response = self._fix_quotations(response)
	elif rule == 'JsonFormat':
	response = self._fix_json_format(response, instruction)
	elif rule == 'NumberOfWords':
	response = self._fix_word_count(response, instruction)
	elif rule == 'SectionChecker':
	response = self._fix_sections(response, instruction)
	elif rule == 'EndChecker':
	response = self._fix_ending(response, instruction)
	elif rule == 'ForbiddenWords':
	response = self._fix_forbidden_words(response, instruction)
	elif rule == 'CapitalLettersEnglishChecker':
	response = self._fix_all_capitals(response, instruction)
	return response

	def _fix_commas(self, response, instruction):
	return response.replace(',', '')

	def _fix_placeholders(self, response, instruction):
	num_match = re.search(r'at least (\d+)', instruction, re.IGNORECASE)
	if num_match:
	target_count = int(num_match.group(1))
	current_count = len(re.findall(r'\[.*?\]', response))
	words = response.split()
	for i in range(target_count - current_count):
	if i < len(words):
	words[i] = f'[{words[i]}]'
	return ' '.join(words)
	return response

	def _fix_lowercase(self, response):
	return response.lower()

	def _fix_capital_frequency(self, response, instruction):
	max_match = re.search(r'less than (\d+)', instruction, re.IGNORECASE)
	if max_match:
	max_capitals = int(max_match.group(1))
	words = response.split()
	capital_count = sum(1 for word in words if word.isupper())
	if capital_count > max_capitals:
	for i, word in enumerate(words):
	if word.isupper() and capital_count > max_capitals:
	words[i] = word.lower()
	capital_count -= 1
	return ' '.join(words)
	return response

	def _fix_quotations(self, response):
	return f'"{response}"'

	def _fix_json_format(self, response, instruction):
	return json.dumps({"response": response}, indent=2)

	def _fix_word_count(self, response, instruction):
	limit_match = re.search(r'less than (\d+)', instruction, re.IGNORECASE)
	if limit_match:
	word_limit = int(limit_match.group(1))
	words = response.split()

	if len(words) > word_limit:
	return ' '.join(words[:word_limit])
	return response

	def _fix_sections(self, response, instruction):
	section_match = re.search(r'(\d+) section', instruction, re.IGNORECASE)
	if section_match:
	num_sections = int(section_match.group(1))
	sections = []

	for i in range(num_sections):
	sections.append(f"SECTION {i+1}:")
	sections.append("This section provides content here.")

	return '\n\n'.join(sections)
	return response

	def _fix_ending(self, response, instruction):
	end_match = re.search(r'finish.with.phrase[:\s]([^.!?])', instruction, re.IGNORECASE)
	if end_match:
	required_ending = end_match.group(1).strip()
	if not response.endswith(required_ending):
	return response + " " + required_ending
	return response

	def _fix_forbidden_words(self, response, instruction):
	forbidden_match = re.search(r'without.word[:\s]([^.!?]*)', instruction, re.IGNORECASE)
	if forbidden_match:
	forbidden_word = forbidden_match.group(1).strip().lower()
	response = re.sub(re.escape(forbidden_word), '', response, flags=re.IGNORECASE)
	return response.strip()

	def _fix_all_capitals(self, response, instruction):
	return response.upper()