| | --- |
| | pipeline_tag: summarization |
| |
|
| | --- |
| | ```python |
| | import random |
| | |
| | def add_spelling_errors(text): |
| | noisy_text = list(text) |
| | modified_text = [] |
| | for i in range(len(noisy_text)): |
| | if random.random() < 0.1: |
| | if noisy_text[i] in ['은', '는', '이', '가','을','를']: |
| | noisy_text[i] = random.choice(['은', '는', '이', '가','를','을']) # 语法 |
| | continue |
| | elif noisy_text[i] in ['와','과']: |
| | noisy_text[i] = random.choice(['와','과']) # 语法 |
| | continue |
| | elif random.random() < 0.1: |
| | # 随机插入字符 |
| | noisy_text.insert(i, random.choice(['하', '로', '니', '고', '었', '나'])) |
| | # 这里不需要增加i,因为insert操作会将插入位置之后的字符向后移动 |
| | #i += 1 # 移动到下一个位置,因为插入了一个字符 |
| | |
| | # 删除空格或交换字符 |
| | if noisy_text[i] == ' ' and random.random() < 0.1: |
| | continue # 跳过空格 |
| | |
| | elif random.random() < 0.1: # 控制交换字符的概率 |
| | if i < len(noisy_text) - 1: |
| | noisy_text[i], noisy_text[i + 1] = noisy_text[i + 1], noisy_text[i] |
| | |
| | modified_text.append(noisy_text[i]) |
| | |
| | return ''.join(modified_text) |
| | |
| | ``` |
| |
|
| |
|