Spaces:
Sleeping
Sleeping
| """ | |
| Validate the quality of generated training examples | |
| """ | |
| import json | |
| import re | |
| from typing import List, Dict, Tuple | |
| def analyze_training_examples(filepath: str) -> Dict: | |
| """Analyze the quality and characteristics of training examples""" | |
| with open(filepath, 'r', encoding='utf-8') as f: | |
| examples = json.load(f) | |
| analysis = { | |
| 'total_examples': len(examples), | |
| 'provocative_titles': 0, | |
| 'cynical_phrases': 0, | |
| 'technical_content': 0, | |
| 'negative_analogies': 0, | |
| 'avg_article_length': 0, | |
| 'style_consistency': 0, | |
| 'sample_titles': [] | |
| } | |
| # Style indicators | |
| provocative_words = [ | |
| 'disaster', 'catastrophe', 'crash', 'burn', 'fail', 'collapse', 'meltdown', | |
| 'nightmare', 'fiasco', 'debacle', 'train wreck', 'explosion', 'implosion' | |
| ] | |
| cynical_phrases = [ | |
| 'of course', 'naturally', 'predictably', 'unsurprisingly', 'evidently', | |
| 'clearly', 'obviously', 'needless to say' | |
| ] | |
| negative_analogies = [ | |
| 'train wreck', 'collision', 'explosion', 'disaster', 'catastrophe', | |
| 'meltdown', 'implosion', 'crash', 'carnival barker', 'unicorn' | |
| ] | |
| technical_terms = [ | |
| '5G', 'RAN', 'AI', 'edge computing', 'automation', 'cloud', 'network', | |
| 'operator', 'vendor', 'infrastructure', 'deployment', 'integration' | |
| ] | |
| total_length = 0 | |
| style_score = 0 | |
| for example in examples: | |
| if 'messages' in example and len(example['messages']) >= 3: | |
| content = example['messages'][2]['content'] | |
| title_line = content.split('\n\n')[0] | |
| title = title_line[2:] if title_line.startswith('# ') else title_line | |
| # Collect sample titles | |
| if len(analysis['sample_titles']) < 10: | |
| analysis['sample_titles'].append(title) | |
| content_lower = content.lower() | |
| # Check for provocative titles | |
| if any(word in title.lower() for word in provocative_words): | |
| analysis['provocative_titles'] += 1 | |
| # Check for cynical phrases | |
| if any(phrase in content_lower for phrase in cynical_phrases): | |
| analysis['cynical_phrases'] += 1 | |
| # Check for technical content | |
| if any(term.lower() in content_lower for term in technical_terms): | |
| analysis['technical_content'] += 1 | |
| # Check for negative analogies | |
| if any(analogy in content_lower for analogy in negative_analogies): | |
| analysis['negative_analogies'] += 1 | |
| # Calculate article length | |
| article_length = len(content) | |
| total_length += article_length | |
| # Style consistency score (0-4 based on presence of key elements) | |
| style_elements = 0 | |
| if any(word in title.lower() for word in provocative_words): | |
| style_elements += 1 | |
| if any(phrase in content_lower for phrase in cynical_phrases): | |
| style_elements += 1 | |
| if any(analogy in content_lower for analogy in negative_analogies): | |
| style_elements += 1 | |
| if any(term.lower() in content_lower for term in technical_terms): | |
| style_elements += 1 | |
| style_score += style_elements | |
| # Calculate averages and percentages | |
| if examples: | |
| analysis['avg_article_length'] = total_length // len(examples) | |
| analysis['style_consistency'] = (style_score / (len(examples) * 4)) * 100 | |
| # Convert counts to percentages | |
| analysis['provocative_titles'] = (analysis['provocative_titles'] / len(examples)) * 100 | |
| analysis['cynical_phrases'] = (analysis['cynical_phrases'] / len(examples)) * 100 | |
| analysis['technical_content'] = (analysis['technical_content'] / len(examples)) * 100 | |
| analysis['negative_analogies'] = (analysis['negative_analogies'] / len(examples)) * 100 | |
| return analysis | |
| def print_analysis_report(analysis: Dict): | |
| """Print a detailed analysis report""" | |
| print("=" * 60) | |
| print("TRAINING EXAMPLES QUALITY ANALYSIS") | |
| print("=" * 60) | |
| print(f"Total Examples: {analysis['total_examples']}") | |
| print(f"Average Article Length: {analysis['avg_article_length']:,} characters") | |
| print() | |
| print("STYLE ANALYSIS:") | |
| print(f" Provocative Titles: {analysis['provocative_titles']:.1f}%") | |
| print(f" Cynical Phrases: {analysis['cynical_phrases']:.1f}%") | |
| print(f" Technical Content: {analysis['technical_content']:.1f}%") | |
| print(f" Negative Analogies: {analysis['negative_analogies']:.1f}%") | |
| print(f" Overall Style Consistency: {analysis['style_consistency']:.1f}%") | |
| print() | |
| print("SAMPLE TITLES:") | |
| for i, title in enumerate(analysis['sample_titles'], 1): | |
| print(f" {i:2d}. {title}") | |
| print() | |
| # Quality assessment | |
| quality_score = ( | |
| analysis['provocative_titles'] + | |
| analysis['cynical_phrases'] + | |
| analysis['technical_content'] + | |
| analysis['negative_analogies'] | |
| ) / 4 | |
| print("QUALITY ASSESSMENT:") | |
| if quality_score >= 80: | |
| print(" ✅ EXCELLENT - High-quality examples with strong style consistency") | |
| elif quality_score >= 60: | |
| print(" ✅ GOOD - Solid examples with good style elements") | |
| elif quality_score >= 40: | |
| print(" ⚠️ FAIR - Acceptable but could use improvement") | |
| else: | |
| print(" ❌ POOR - Needs significant improvement") | |
| print(f" Overall Quality Score: {quality_score:.1f}%") | |
| print() | |
| def compare_datasets(original_file: str, new_file: str): | |
| """Compare original and new datasets""" | |
| print("DATASET COMPARISON:") | |
| print("-" * 40) | |
| original_analysis = analyze_training_examples(original_file) | |
| new_analysis = analyze_training_examples(new_file) | |
| print(f"Original Dataset: {original_analysis['total_examples']} examples") | |
| print(f"Expanded Dataset: {new_analysis['total_examples']} examples") | |
| print(f"New Examples Added: {new_analysis['total_examples'] - original_analysis['total_examples']}") | |
| print() | |
| print("STYLE CONSISTENCY COMPARISON:") | |
| print(f" Original: {original_analysis['style_consistency']:.1f}%") | |
| print(f" Expanded: {new_analysis['style_consistency']:.1f}%") | |
| if new_analysis['style_consistency'] >= original_analysis['style_consistency']: | |
| print(" ✅ Style consistency maintained or improved") | |
| else: | |
| print(" ⚠️ Style consistency decreased") | |
| print() | |
| def main(): | |
| """Main validation function""" | |
| print("Validating training examples quality...\n") | |
| # Analyze the new examples | |
| print("ANALYZING NEW EXAMPLES:") | |
| new_analysis = analyze_training_examples('data/additional_training_examples.json') | |
| print_analysis_report(new_analysis) | |
| # Analyze the expanded dataset | |
| print("ANALYZING EXPANDED DATASET:") | |
| expanded_analysis = analyze_training_examples('data/expanded_train_dataset.json') | |
| print_analysis_report(expanded_analysis) | |
| # Compare with original | |
| try: | |
| compare_datasets('data/train_dataset.json', 'data/expanded_train_dataset.json') | |
| except FileNotFoundError: | |
| print("Original dataset not found for comparison.") | |
| print("=" * 60) | |
| print("VALIDATION COMPLETE") | |
| print("=" * 60) | |
| if __name__ == "__main__": | |
| main() | |