| import pandas as pd |
|
|
|
|
| def process_csv(input_file, output_file, sample_size=1000): |
| """Read CSV, replace '.' with '0' in question, rename columns, sample and save.""" |
| df = pd.read_csv(input_file) |
|
|
| required_columns = ['source', 'question', 'answer', 'rating'] |
| missing = [c for c in required_columns if c not in df.columns] |
| if missing: |
| raise ValueError(f"Missing required columns: {missing}") |
|
|
| df['question'] = df['question'].str.replace('.', '0', regex=False) |
|
|
| df = df.rename(columns={'question': 'quizzes', 'answer': 'solutions'}) |
|
|
| df = df.head(sample_size)[['quizzes', 'solutions', 'rating']] |
|
|
| df.to_csv(output_file, index=False) |
| print(f"Saved {len(df)} rows to {output_file}") |
|
|
|
|
| if __name__ == "__main__": |
| input_csv = "test.csv" |
| output_csv = "hard_test.csv" |
| process_csv(input_csv, output_csv, sample_size=5000) |
|
|
| input_csv = "train.csv" |
| output_csv = "hard_train.csv" |
| |
|
|