Xcz2568
/

robustness_t5

text2text-generation

text-generation-inference

Model card Files Files and versions

robustness_t5 / README.md

Xcz2568's picture

Update README.md

da4d9e2 verified almost 2 years ago

|

history blame contribute delete

1.38 kB

	---
	pipeline_tag: summarization

	---
	```python
	import random

	def add_spelling_errors(text):
	noisy_text = list(text)
	modified_text = []
	for i in range(len(noisy_text)):
	if random.random() < 0.1:
	if noisy_text[i] in ['은', '는', '이', '가','을','를']:
	noisy_text[i] = random.choice(['은', '는', '이', '가','를','을']) # 语法
	continue
	elif noisy_text[i] in ['와','과']:
	noisy_text[i] = random.choice(['와','과']) # 语法
	continue
	elif random.random() < 0.1:
	# 随机插入字符
	noisy_text.insert(i, random.choice(['하', '로', '니', '고', '었', '나']))
	# 这里不需要增加i，因为insert操作会将插入位置之后的字符向后移动
	#i += 1 # 移动到下一个位置，因为插入了一个字符

	# 删除空格或交换字符
	if noisy_text[i] == ' ' and random.random() < 0.1:
	continue # 跳过空格

	elif random.random() < 0.1: # 控制交换字符的概率
	if i < len(noisy_text) - 1:
	noisy_text[i], noisy_text[i + 1] = noisy_text[i + 1], noisy_text[i]

	modified_text.append(noisy_text[i])

	return ''.join(modified_text)

	```