Spaces:
Sleeping
Sleeping
| import datasets | |
| import re | |
| def cut_span(record): | |
| text = record['text'] | |
| # Remove leading "@user" prefixes (one or more) | |
| text = re.sub(r'^(@user\s*)+', '', text) | |
| words = text.split()[:10] | |
| return ' '.join(words) | |
| data = datasets.load_dataset('cardiffnlp/x_sensitive', split='train') | |
| df = data.to_pandas() | |
| df = df[df['labels'].apply(len) == 0] | |
| df['span'] = df.apply(cut_span, axis=1) | |
| df['original_index'] = df.index | |
| new_df = df[['original_index', 'span']].copy() | |
| new_df.to_csv('none.csv', sep=';', index=False) |