Spaces:
Sleeping
Sleeping
| """ | |
| Update system prompts in training data to use the improved version from the guide | |
| """ | |
| import json | |
| import logging | |
| # Set up logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| def update_system_prompts(): | |
| """Update system prompts in the expanded training dataset""" | |
| # Improved system prompt from the guide | |
| improved_system_prompt = """You are Iain Morris, a razor-sharp British writer with zero tolerance for BS. Your writing style is distinctive for: | |
| PROVOCATIVE DOOM-LADEN OPENINGS: | |
| - Always lead with conflict, failure, or impending disaster | |
| - Use visceral, dramatic scenarios that grab readers by the throat | |
| - Frame mundane topics as battles, collisions, or catastrophes | |
| - Open with vivid imagery that establishes immediate tension | |
| SIGNATURE DARK ANALOGIES: | |
| - Compare situations to train wrecks, explosions, collisions | |
| - Use physical, visceral metaphors for abstract problems | |
| - Reference pop culture disasters and failures | |
| - Turn simple concepts into dramatic, often dark imagery | |
| CYNICAL WIT & EXPERTISE: | |
| - Deliver insights with biting sarcasm and parenthetical snark | |
| - Assume readers are intelligent but skeptical | |
| - Quote figures, then immediately undercut them | |
| - Use technical knowledge as a weapon of wit | |
| DISTINCTIVE PHRASES: | |
| - "What could possibly go wrong?" | |
| - "kiss of death," "train wreck," "collision course" | |
| - Parenthetical asides for extra snark | |
| - British expressions and dry humor | |
| Write with the assumption that everything is either failing, about to fail, or succeeding despite obvious flaws.""" | |
| # Load the expanded training dataset | |
| logger.info("Loading expanded training dataset...") | |
| with open('data/expanded_train_dataset.json', 'r') as f: | |
| training_data = json.load(f) | |
| logger.info(f"Loaded {len(training_data)} training examples") | |
| # Update system prompts | |
| updated_count = 0 | |
| for example in training_data: | |
| for message in example['messages']: | |
| if message['role'] == 'system': | |
| message['content'] = improved_system_prompt | |
| updated_count += 1 | |
| break | |
| logger.info(f"Updated {updated_count} system prompts") | |
| # Save the updated dataset | |
| with open('data/improved_train_dataset.json', 'w') as f: | |
| json.dump(training_data, f, indent=2) | |
| logger.info("Saved improved training dataset to data/improved_train_dataset.json") | |
| # Also update validation dataset | |
| logger.info("Updating validation dataset...") | |
| with open('data/val_dataset.json', 'r') as f: | |
| val_data = json.load(f) | |
| val_updated_count = 0 | |
| for example in val_data: | |
| for message in example['messages']: | |
| if message['role'] == 'system': | |
| message['content'] = improved_system_prompt | |
| val_updated_count += 1 | |
| break | |
| logger.info(f"Updated {val_updated_count} validation system prompts") | |
| # Save the updated validation dataset | |
| with open('data/improved_val_dataset.json', 'w') as f: | |
| json.dump(val_data, f, indent=2) | |
| logger.info("Saved improved validation dataset to data/improved_val_dataset.json") | |
| return updated_count, val_updated_count | |
| if __name__ == "__main__": | |
| train_count, val_count = update_system_prompts() | |
| print(f"Successfully updated {train_count} training examples and {val_count} validation examples") | |