OUDOUD_148_28 / eval_helper.py
Shinichie's picture
Upload folder using huggingface_hub
7bd6c7e verified
import re
import json
class EvalHandler:
def __init__(self):
self.rule_patterns = {
'comma_restriction': re.compile(r'no.*comma|without.*comma', re.IGNORECASE),
'placeholder_requirement': re.compile(r'placeholder.*\[.*\]|square.*bracket', re.IGNORECASE),
'lowercase_requirement': re.compile(r'lowercase|no.*capital|all.*lowercase', re.IGNORECASE),
'capital_frequency': re.compile(r'capital.*letter.*less.*than|capital.*word.*frequency', re.IGNORECASE),
'quotation_requirement': re.compile(r'wrap.*quotation|double.*quote', re.IGNORECASE),
'json_format': re.compile(r'json.*format|JSON.*output|format.*json', re.IGNORECASE),
'word_count': re.compile(r'less.*than.*word|word.*limit|maximum.*word', re.IGNORECASE),
'section_requirement': re.compile(r'section.*start|SECTION.*X', re.IGNORECASE),
'ending_requirement': re.compile(r'finish.*exact.*phrase|end.*phrase', re.IGNORECASE),
'forbidden_words': re.compile(r'not.*allowed|forbidden.*word|without.*word', re.IGNORECASE),
'capital_letters_only': re.compile(r'all.*capital|CAPITAL.*letter', re.IGNORECASE)
}
def detect_rules(self, instruction):
applicable_rules = []
if self.rule_patterns['comma_restriction'].search(instruction):
applicable_rules.append('CommaChecker')
if self.rule_patterns['placeholder_requirement'].search(instruction):
applicable_rules.append('PlaceholderChecker')
if self.rule_patterns['lowercase_requirement'].search(instruction):
applicable_rules.append('LowercaseLettersEnglishChecker')
if self.rule_patterns['capital_frequency'].search(instruction):
applicable_rules.append('CapitalWordFrequencyChecker')
if self.rule_patterns['quotation_requirement'].search(instruction):
applicable_rules.append('QuotationChecker')
if self.rule_patterns['json_format'].search(instruction):
applicable_rules.append('JsonFormat')
if self.rule_patterns['word_count'].search(instruction):
applicable_rules.append('NumberOfWords')
if self.rule_patterns['section_requirement'].search(instruction):
applicable_rules.append('SectionChecker')
if self.rule_patterns['ending_requirement'].search(instruction):
applicable_rules.append('EndChecker')
if self.rule_patterns['forbidden_words'].search(instruction):
applicable_rules.append('ForbiddenWords')
if self.rule_patterns['capital_letters_only'].search(instruction):
applicable_rules.append('CapitalLettersEnglishChecker')
return applicable_rules
def apply_rule_fix(self, response, rules, instruction= ""):
for rule in rules:
if rule == 'CommaChecker':
response = self._fix_commas(response, instruction)
elif rule == 'PlaceholderChecker':
response = self._fix_placeholders(response, instruction)
elif rule == 'LowercaseLettersEnglishChecker':
response = self._fix_lowercase(response)
elif rule == 'CapitalWordFrequencyChecker':
response = self._fix_capital_frequency(response, instruction)
elif rule == 'QuotationChecker':
response = self._fix_quotations(response)
elif rule == 'JsonFormat':
response = self._fix_json_format(response, instruction)
elif rule == 'NumberOfWords':
response = self._fix_word_count(response, instruction)
elif rule == 'SectionChecker':
response = self._fix_sections(response, instruction)
elif rule == 'EndChecker':
response = self._fix_ending(response, instruction)
elif rule == 'ForbiddenWords':
response = self._fix_forbidden_words(response, instruction)
elif rule == 'CapitalLettersEnglishChecker':
response = self._fix_all_capitals(response, instruction)
return response
def _fix_commas(self, response, instruction):
return response.replace(',', '')
def _fix_placeholders(self, response, instruction):
num_match = re.search(r'at least (\d+)', instruction, re.IGNORECASE)
if num_match:
target_count = int(num_match.group(1))
current_count = len(re.findall(r'\[.*?\]', response))
words = response.split()
for i in range(target_count - current_count):
if i < len(words):
words[i] = f'[{words[i]}]'
return ' '.join(words)
return response
def _fix_lowercase(self, response):
return response.lower()
def _fix_capital_frequency(self, response, instruction):
max_match = re.search(r'less than (\d+)', instruction, re.IGNORECASE)
if max_match:
max_capitals = int(max_match.group(1))
words = response.split()
capital_count = sum(1 for word in words if word.isupper())
if capital_count > max_capitals:
for i, word in enumerate(words):
if word.isupper() and capital_count > max_capitals:
words[i] = word.lower()
capital_count -= 1
return ' '.join(words)
return response
def _fix_quotations(self, response):
return f'"{response}"'
def _fix_json_format(self, response, instruction):
return json.dumps({"response": response}, indent=2)
def _fix_word_count(self, response, instruction):
limit_match = re.search(r'less than (\d+)', instruction, re.IGNORECASE)
if limit_match:
word_limit = int(limit_match.group(1))
words = response.split()
if len(words) > word_limit:
return ' '.join(words[:word_limit])
return response
def _fix_sections(self, response, instruction):
section_match = re.search(r'(\d+) section', instruction, re.IGNORECASE)
if section_match:
num_sections = int(section_match.group(1))
sections = []
for i in range(num_sections):
sections.append(f"SECTION {i+1}:")
sections.append("This section provides content here.")
return '\n\n'.join(sections)
return response
def _fix_ending(self, response, instruction):
end_match = re.search(r'finish.*with.*phrase[:\s]*([^.!?]*)', instruction, re.IGNORECASE)
if end_match:
required_ending = end_match.group(1).strip()
if not response.endswith(required_ending):
return response + " " + required_ending
return response
def _fix_forbidden_words(self, response, instruction):
forbidden_match = re.search(r'without.*word[:\s]*([^.!?]*)', instruction, re.IGNORECASE)
if forbidden_match:
forbidden_word = forbidden_match.group(1).strip().lower()
response = re.sub(re.escape(forbidden_word), '', response, flags=re.IGNORECASE)
return response.strip()
def _fix_all_capitals(self, response, instruction):
return response.upper()