Spaces:
Sleeping
Sleeping
File size: 529 Bytes
5f2a5b3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
import datasets
import re
def cut_span(record):
text = record['text']
# Remove leading "@user" prefixes (one or more)
text = re.sub(r'^(@user\s*)+', '', text)
words = text.split()[:10]
return ' '.join(words)
data = datasets.load_dataset('cardiffnlp/x_sensitive', split='train')
df = data.to_pandas()
df = df[df['labels'].apply(len) == 0]
df['span'] = df.apply(cut_span, axis=1)
df['original_index'] = df.index
new_df = df[['original_index', 'span']].copy()
new_df.to_csv('none.csv', sep=';', index=False) |