| | import re |
| | from typing import Callable, Optional, Union |
| |
|
| | from opencompass.registry import TEXT_POSTPROCESSORS |
| |
|
| |
|
| | @TEXT_POSTPROCESSORS.register_module('general') |
| | def general_postprocess(text: str) -> str: |
| | |
| | truncated_text = re.split(r'[\n.,]', text, 1)[0] |
| |
|
| | |
| | no_punctuation = re.sub(r'[^\w\s]', '', truncated_text) |
| |
|
| | |
| | no_articles = re.sub(r'\b(a|an|the)\b', |
| | '', |
| | no_punctuation, |
| | flags=re.IGNORECASE) |
| |
|
| | |
| | cleaned_text = re.sub(r'\s+', ' ', no_articles).strip() |
| |
|
| | return cleaned_text |
| |
|
| |
|
| | @TEXT_POSTPROCESSORS.register_module('general_cn') |
| | def general_cn_postprocess(text: str) -> str: |
| | truncated_text = re.split(r'[\n.,]', text, 1)[0] |
| |
|
| | no_punctuation = re.sub(r'[^\w\s]', '', truncated_text) |
| |
|
| | no_articles = re.sub(r'\b(a|an|the)\b', |
| | '', |
| | no_punctuation, |
| | flags=re.IGNORECASE) |
| |
|
| | cleaned_text = re.sub(r'\s+', ' ', no_articles).strip() |
| | import jieba |
| | cleaned_text = ' '.join(jieba.cut(text)) |
| | return cleaned_text |
| |
|
| |
|
| | @TEXT_POSTPROCESSORS.register_module('first-capital') |
| | def first_capital_postprocess(text: str) -> str: |
| | for t in text: |
| | if t.isupper(): |
| | return t |
| | return '' |
| |
|
| |
|
| | @TEXT_POSTPROCESSORS.register_module('last-capital') |
| | def last_capital_postprocess(text: str) -> str: |
| | for t in text[::-1]: |
| | if t.isupper(): |
| | return t |
| | return '' |
| |
|
| |
|
| | def first_option_postprocess(text: str, options: str, cushion=True) -> str: |
| | """Find first valid option for text.""" |
| |
|
| | |
| | |
| | patterns = [ |
| | f'答案是?\s?([{options}])', |
| | f'答案是?\s?:([{options}])', |
| | f'答案是?\s?:([{options}])', |
| | f'答案应该?是\s?([{options}])', |
| | f'答案应该?选\s?([{options}])', |
| | f'答案为\s?([{options}])', |
| | f'答案选\s?([{options}])', |
| | f'选择?\s?([{options}])', |
| | f'故选?\s?([{options}])' |
| | f'只有选?项?\s?([{options}])\s?是?对', |
| | f'只有选?项?\s?([{options}])\s?是?错', |
| | f'只有选?项?\s?([{options}])\s?不?正确', |
| | f'只有选?项?\s?([{options}])\s?错误', |
| | f'说法不?对选?项?的?是\s?([{options}])', |
| | f'说法不?正确选?项?的?是\s?([{options}])', |
| | f'说法错误选?项?的?是\s?([{options}])', |
| | f'([{options}])\s?是正确的', |
| | f'([{options}])\s?是正确答案', |
| | f'选项\s?([{options}])\s?正确', |
| | f'所以答\s?([{options}])', |
| | f'所以\s?([{options}][.。$]?$)', |
| | f'所有\s?([{options}][.。$]?$)', |
| | f'[\s,::,]([{options}])[。,,\.]?$', |
| | f'[\s,,::][故即]([{options}])[。\.]?$', |
| | f'[\s,,::]因此([{options}])[。\.]?$', |
| | f'[是为。]\s?([{options}])[。\.]?$', |
| | f'因此\s?([{options}])[。\.]?$', |
| | f'显然\s?([{options}])[。\.]?$', |
| | f'答案是\s?(\S+)(?:。|$)', |
| | f'答案应该是\s?(\S+)(?:。|$)', |
| | f'答案为\s?(\S+)(?:。|$)', |
| | f'[Tt]he answer is ([{options}])', |
| | f'[Tt]he answer is option ([{options}])', |
| | f'[Tt]he correct answer is ([{options}])', |
| | f'[Tt]he correct answer is option ([{options}])', |
| | f'[Tt]he answer to the question is ([{options}])', |
| | f'^选项\s?([{options}])', |
| | f'^([{options}])\s?选?项', |
| | f'(\s|^)[{options}][\s。,,::\.$]', |
| | f'(\s|^)[{options}](\s|$)', |
| | f'1.\s?(.*?)$', |
| | f'1.\s?([{options}])[.。$]?$', |
| | ] |
| | cushion_patterns = [ |
| | f'([{options}]):', |
| | f'[{options}]', |
| | ] |
| | |
| | |
| |
|
| | if cushion: |
| | patterns.extend(cushion_patterns) |
| | for pattern in patterns: |
| | match = re.search(pattern, text) |
| | if match: |
| | outputs = match.group(0) |
| | for i in options: |
| | if i in outputs: |
| | return i |
| | return '' |
| |
|
| |
|
| | @TEXT_POSTPROCESSORS.register_module('first-capital-multi') |
| | def first_capital_postprocess_multi(text: str) -> str: |
| | match = re.search(r'([A-D]+)', text) |
| | if match: |
| | return match.group(1) |
| | return '' |
| |
|
| |
|
| | def last_option_postprocess(text: str, options: str) -> str: |
| | match = re.findall(rf'([{options}])', text) |
| | if match: |
| | return match[-1] |
| | return '' |
| |
|
| |
|
| | def first_number_postprocess(text: str) -> float: |
| | """Return the first number in a string.""" |
| | |
| | pattern = r'(-?\d*\.?\d+)' |
| |
|
| | |
| | match = re.search(pattern, text) |
| |
|
| | |
| | return float(match.group(1)) if match else None |
| |
|
| |
|
| | @TEXT_POSTPROCESSORS.register_module('multiple-select') |
| | def multiple_select_postprocess(text: str) -> str: |
| | ret = set([t for t in text if t.isupper()]) |
| | return ''.join(sorted(ret)) |
| |
|
| |
|
| | def general_eval_wrapper_postprocess(text: str, |
| | postprocess: Optional[Union[ |
| | str, Callable]] = None, |
| | **kwargs) -> str: |
| | """Wrapper for eval text repr. Especially for chatglmpro. |
| | |
| | Args: |
| | text(str): Text to be postprocessed. |
| | postprocess(Callable, optional): Original post processing function. |
| | Defaults to None. |
| | **kwargs: Other necessary kwargs for post processing function. |
| | """ |
| | try: |
| | text = eval(text) |
| | except Exception: |
| | |
| | pass |
| |
|
| | if postprocess: |
| | if isinstance(postprocess, str): |
| | postprocess = TEXT_POSTPROCESSORS.get(postprocess) |
| | return postprocess(text, **kwargs) |
| | else: |
| | return text |
| |
|