import re import json class EvalHandler: def __init__(self): self.rule_patterns = { 'comma_restriction': re.compile(r'no.*comma|without.*comma', re.IGNORECASE), 'placeholder_requirement': re.compile(r'placeholder.*\[.*\]|square.*bracket', re.IGNORECASE), 'lowercase_requirement': re.compile(r'lowercase|no.*capital|all.*lowercase', re.IGNORECASE), 'capital_frequency': re.compile(r'capital.*letter.*less.*than|capital.*word.*frequency', re.IGNORECASE), 'quotation_requirement': re.compile(r'wrap.*quotation|double.*quote', re.IGNORECASE), 'json_format': re.compile(r'json.*format|JSON.*output|format.*json', re.IGNORECASE), 'word_count': re.compile(r'less.*than.*word|word.*limit|maximum.*word', re.IGNORECASE), 'section_requirement': re.compile(r'section.*start|SECTION.*X', re.IGNORECASE), 'ending_requirement': re.compile(r'finish.*exact.*phrase|end.*phrase', re.IGNORECASE), 'forbidden_words': re.compile(r'not.*allowed|forbidden.*word|without.*word', re.IGNORECASE), 'capital_letters_only': re.compile(r'all.*capital|CAPITAL.*letter', re.IGNORECASE) } def detect_rules(self, instruction): applicable_rules = [] if self.rule_patterns['comma_restriction'].search(instruction): applicable_rules.append('CommaChecker') if self.rule_patterns['placeholder_requirement'].search(instruction): applicable_rules.append('PlaceholderChecker') if self.rule_patterns['lowercase_requirement'].search(instruction): applicable_rules.append('LowercaseLettersEnglishChecker') if self.rule_patterns['capital_frequency'].search(instruction): applicable_rules.append('CapitalWordFrequencyChecker') if self.rule_patterns['quotation_requirement'].search(instruction): applicable_rules.append('QuotationChecker') if self.rule_patterns['json_format'].search(instruction): applicable_rules.append('JsonFormat') if self.rule_patterns['word_count'].search(instruction): applicable_rules.append('NumberOfWords') if self.rule_patterns['section_requirement'].search(instruction): applicable_rules.append('SectionChecker') if self.rule_patterns['ending_requirement'].search(instruction): applicable_rules.append('EndChecker') if self.rule_patterns['forbidden_words'].search(instruction): applicable_rules.append('ForbiddenWords') if self.rule_patterns['capital_letters_only'].search(instruction): applicable_rules.append('CapitalLettersEnglishChecker') return applicable_rules def apply_rule_fix(self, response, rules, instruction= ""): for rule in rules: if rule == 'CommaChecker': response = self._fix_commas(response, instruction) elif rule == 'PlaceholderChecker': response = self._fix_placeholders(response, instruction) elif rule == 'LowercaseLettersEnglishChecker': response = self._fix_lowercase(response) elif rule == 'CapitalWordFrequencyChecker': response = self._fix_capital_frequency(response, instruction) elif rule == 'QuotationChecker': response = self._fix_quotations(response) elif rule == 'JsonFormat': response = self._fix_json_format(response, instruction) elif rule == 'NumberOfWords': response = self._fix_word_count(response, instruction) elif rule == 'SectionChecker': response = self._fix_sections(response, instruction) elif rule == 'EndChecker': response = self._fix_ending(response, instruction) elif rule == 'ForbiddenWords': response = self._fix_forbidden_words(response, instruction) elif rule == 'CapitalLettersEnglishChecker': response = self._fix_all_capitals(response, instruction) return response def _fix_commas(self, response, instruction): return response.replace(',', '') def _fix_placeholders(self, response, instruction): num_match = re.search(r'at least (\d+)', instruction, re.IGNORECASE) if num_match: target_count = int(num_match.group(1)) current_count = len(re.findall(r'\[.*?\]', response)) words = response.split() for i in range(target_count - current_count): if i < len(words): words[i] = f'[{words[i]}]' return ' '.join(words) return response def _fix_lowercase(self, response): return response.lower() def _fix_capital_frequency(self, response, instruction): max_match = re.search(r'less than (\d+)', instruction, re.IGNORECASE) if max_match: max_capitals = int(max_match.group(1)) words = response.split() capital_count = sum(1 for word in words if word.isupper()) if capital_count > max_capitals: for i, word in enumerate(words): if word.isupper() and capital_count > max_capitals: words[i] = word.lower() capital_count -= 1 return ' '.join(words) return response def _fix_quotations(self, response): return f'"{response}"' def _fix_json_format(self, response, instruction): return json.dumps({"response": response}, indent=2) def _fix_word_count(self, response, instruction): limit_match = re.search(r'less than (\d+)', instruction, re.IGNORECASE) if limit_match: word_limit = int(limit_match.group(1)) words = response.split() if len(words) > word_limit: return ' '.join(words[:word_limit]) return response def _fix_sections(self, response, instruction): section_match = re.search(r'(\d+) section', instruction, re.IGNORECASE) if section_match: num_sections = int(section_match.group(1)) sections = [] for i in range(num_sections): sections.append(f"SECTION {i+1}:") sections.append("This section provides content here.") return '\n\n'.join(sections) return response def _fix_ending(self, response, instruction): end_match = re.search(r'finish.*with.*phrase[:\s]*([^.!?]*)', instruction, re.IGNORECASE) if end_match: required_ending = end_match.group(1).strip() if not response.endswith(required_ending): return response + " " + required_ending return response def _fix_forbidden_words(self, response, instruction): forbidden_match = re.search(r'without.*word[:\s]*([^.!?]*)', instruction, re.IGNORECASE) if forbidden_match: forbidden_word = forbidden_match.group(1).strip().lower() response = re.sub(re.escape(forbidden_word), '', response, flags=re.IGNORECASE) return response.strip() def _fix_all_capitals(self, response, instruction): return response.upper()