from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.enum.text import PP_ALIGN
from pptx.dml.color import RGBColor

# Create presentation
prs = Presentation()
prs.slide_width = Inches(10)
prs.slide_height = Inches(7.5)

def add_title_slide(title, subtitle):
    """Add a title slide"""
    slide = prs.slides.add_slide(prs.slide_layouts[6])  # Blank layout
    background = slide.background
    fill = background.fill
    fill.solid()
    fill.fore_color.rgb = RGBColor(51, 152, 219)  # Blue background
    
    # Add title
    title_box = slide.shapes.add_textbox(Inches(0.5), Inches(2.5), Inches(9), Inches(1.5))
    title_frame = title_box.text_frame
    title_frame.text = title
    title_frame.paragraphs[0].font.size = Pt(60)
    title_frame.paragraphs[0].font.bold = True
    title_frame.paragraphs[0].font.color.rgb = RGBColor(255, 255, 255)
    title_frame.paragraphs[0].alignment = PP_ALIGN.CENTER
    
    # Add subtitle
    subtitle_box = slide.shapes.add_textbox(Inches(0.5), Inches(4.2), Inches(9), Inches(1))
    subtitle_frame = subtitle_box.text_frame
    subtitle_frame.text = subtitle
    subtitle_frame.paragraphs[0].font.size = Pt(32)
    subtitle_frame.paragraphs[0].font.color.rgb = RGBColor(255, 255, 255)
    subtitle_frame.paragraphs[0].alignment = PP_ALIGN.CENTER

def add_content_slide(title, content_list):
    """Add a slide with title and bullet points"""
    slide = prs.slides.add_slide(prs.slide_layouts[6])  # Blank layout
    background = slide.background
    fill = background.fill
    fill.solid()
    fill.fore_color.rgb = RGBColor(240, 240, 240)
    
    # Add title
    title_box = slide.shapes.add_textbox(Inches(0.5), Inches(0.5), Inches(9), Inches(0.8))
    title_frame = title_box.text_frame
    title_frame.text = title
    title_frame.paragraphs[0].font.size = Pt(44)
    title_frame.paragraphs[0].font.bold = True
    title_frame.paragraphs[0].font.color.rgb = RGBColor(44, 62, 80)
    
    # Add content
    content_box = slide.shapes.add_textbox(Inches(1), Inches(1.5), Inches(8), Inches(5.5))
    text_frame = content_box.text_frame
    text_frame.word_wrap = True
    
    for i, item in enumerate(content_list):
        if i > 0:
            text_frame.add_paragraph()
        p = text_frame.paragraphs[i]
        p.text = item
        p.font.size = Pt(22)
        p.font.color.rgb = RGBColor(52, 73, 94)
        p.space_before = Pt(12)
        p.space_after = Pt(12)
        p.level = 0

# Slide 1: Title
add_title_slide("🐦 Bird Species Classifier", "Complete Project Logic & Architecture")

# Slide 2: Project Overview
add_content_slide("Project Overview", [
    "✓ AI-powered application for identifying bird species",
    "✓ Accepts TWO inputs: Bird images and bird sounds",
    "✓ Uses Deep Learning models to classify species",
    "✓ Available in TWO formats: Desktop (Tkinter) & Web (Flask)",
    "✓ Real-time predictions with high accuracy"
])

# Slide 3: Key Features
add_content_slide("Key Features", [
    "📷 Image Classification - Identify birds from photos",
    "🎵 Audio Classification - Identify birds from sound recordings",
    "🔄 Real-time Processing - Instant predictions",
    "🎨 User-friendly Interface - Easy to use for everyone",
    "⚡ Multi-threaded Loading - Smooth user experience",
    "☁️ Uses Pre-trained Models - No training needed"
])

# Slide 4: Architecture Overview
add_content_slide("System Architecture", [
    "INPUT LAYER",
    "  • Image File (.jpg, .png, .bmp)",
    "  • Audio File (.wav, .flac, .mp3)",
    "",
    "PROCESSING LAYER",
    "  • Image Processor / Audio Feature Extractor",
    "",
    "AI MODEL LAYER",
    "  • Vision Transformer (Image)",
    "  • wav2vec2 Model (Audio)",
    "",
    "OUTPUT LAYER",
    "  • Bird Species Prediction"
])

# Slide 5: Image Classification Logic
add_content_slide("Image Classification Pipeline", [
    "1. USER UPLOADS IMAGE",
    "   → Select bird photo file",
    "",
    "2. IMAGE PREPROCESSING",
    "   → Load image using PIL",
    "   → Convert to RGB format",
    "   → Resize & normalize using AutoImageProcessor",
    "",
    "3. MODEL INFERENCE",
    "   → Pass processed image to ViT/CNN model",
    "   → Extract feature embeddings",
    "",
    "4. PREDICTION",
    "   → Get logits (raw scores) from model",
    "   → Apply argmax to get highest probability class",
    "   → Return bird species name"
])

# Slide 6: Audio Classification Logic
add_content_slide("Audio Classification Pipeline", [
    "1. USER UPLOADS AUDIO FILE",
    "   → Select .wav, .flac, or .mp3 file",
    "",
    "2. AUDIO PREPROCESSING (librosa)",
    "   → Load audio file",
    "   → Resample to 16kHz (standard rate)",
    "   → Convert to MONO (single channel)",
    "   → Extract sound features",
    "",
    "3. FEATURE EXTRACTION (wav2vec2)",
    "   → Process audio through feature extractor",
    "   → Convert to numerical embeddings",
    "",
    "4. MODEL INFERENCE",
    "   → Pass embeddings to audio classification model",
    "   → Get prediction logits",
    "",
    "5. OUTPUT SPECIES",
    "   → argmax() gets highest probability"
])

# Slide 7: Models Deep Dive
add_content_slide("AI Models Used", [
    "IMAGE MODEL: chriamue/bird-species-classifier",
    "  • Type: Vision Transformer (ViT)",
    "  • Uses: Self-attention mechanism for images",
    "  • Divides image into patches → Analyzes relationships",
    "",
    "AUDIO MODEL: greenarcade/wav2vec2-vd-bird-sound",
    "  • Type: wav2vec2 (Self-supervised learning)",
    "  • Uses: Transformer architecture on audio",
    "  • Pre-trained on large unlabeled audio → Fine-tuned for birds"
])

# Slide 8: Technologies & Libraries
add_content_slide("Tech Stack", [
    "🐍 BACKEND",
    "  • Python 3.13",
    "  • PyTorch - Deep learning framework",
    "  • Transformers - Pre-trained models library",
    "  • librosa - Audio signal processing",
    "",
    "🖥️ FRONTEND",
    "  • Tkinter - Desktop GUI (main.py)",
    "  • Flask - Web framework (app.py)",
    "  • HTML/CSS/JavaScript - Web interface"
])

# Slide 9: Desktop App (Tkinter)
add_content_slide("Desktop Version (main.py)", [
    "UI Components:",
    "  • Upload Image Button → Opens file dialog",
    "  • Upload Audio Button → Opens file dialog",
    "  • Preview Area → Shows selected file",
    "  • Result Area → Shows prediction + species name",
    "  • Next Bird Button → Reset for new prediction",
    "",
    "Workflow:",
    "  1. App loads models on startup (background thread)",
    "  2. User selects image/audio",
    "  3. Processing happens in background",
    "  4. Result displays in GUI",
    "  5. User can predict again"
])

# Slide 10: Web App (Flask)
add_content_slide("Web Version (app.py)", [
    "Backend Endpoints:",
    "  • / → Serves HTML interface",
    "  • /status → Check if models are loaded",
    "  • /classify-image → Process image",
    "  • /classify-audio → Process audio",
    "",
    "Frontend (JavaScript):",
    "  • Polls /status until models load",
    "  • Sends files to Flask via POST requests",
    "  • Displays results in browser",
    "",
    "Run: python app.py",
    "Access: http://localhost:5000"
])

# Slide 11: Data Flow Diagram
add_content_slide("Complete Data Flow", [
    "USER INPUT (Image/Audio)",
    "         ↓",
    "FILE UPLOAD (Form/Dialog)",
    "         ↓",
    "PREPROCESSING (PIL/librosa)",
    "         ↓",
    "FEATURE EXTRACTION (AutoProcessor/AutoExtractor)",
    "         ↓",
    "DEEP LEARNING MODEL (ViT/wav2vec2)",
    "         ↓",
    "OUTPUT PREDICTIONS (Logits)",
    "         ↓",
    "ARGMAX (Get highest probability)",
    "         ↓",
    "SPECIES NAME (Display to user)"
])

# Slide 12: Key Concepts Explained
add_content_slide("Key AI Concepts", [
    "TRANSFORMER: Neural network architecture using attention",
    "  → Analyzes relationships between all input elements",
    "",
    "VISION TRANSFORMER (ViT): Applies transformers to images",
    "  → Treats image patches like words in sentences",
    "",
    "WAV2VEC2: Self-supervised audio representation learning",
    "  → Learns from unlabeled audio data",
    "  → Converts sound to meaningful features",
    "",
    "INFERENCE: Using trained model to make predictions",
    "  → No training happening, just prediction",
    "",
    "ARGMAX: Gets index of highest value in array",
    "  → Converts [0.1, 0.8, 0.1] → index 1 (bird species)"
])

# Slide 13: Performance Metrics
add_content_slide("Model Performance", [
    "IMAGE CLASSIFICATION:",
    "  • Models: Trained on thousands of bird images",
    "  • Accuracy: High confidence for clear photos",
    "  • Speed: ~1-2 seconds per image",
    "",
    "AUDIO CLASSIFICATION:",
    "  • Models: Fine-tuned on bird sound datasets",
    "  • Accuracy: Excellent for 5-30 second clips",
    "  • Speed: ~1-3 seconds per audio file",
    "",
    "NOTE: Quality of input affects accuracy",
    "  → Clear images = Better results",
    "  → Clean audio = Better results"
])

# Slide 14: Error Handling
add_content_slide("Error Handling & Edge Cases", [
    "Image Issues:",
    "  • Invalid format → Error message shown",
    "  • Corrupted file → Exception caught and reported",
    "",
    "Audio Issues:",
    "  • Unsupported format → librosa fails gracefully",
    "  • Empty audio → Validation check prevents crash",
    "  • Very short clips → Model may give low confidence",
    "",
    "Model Loading:",
    "  • Networks offline → Downloads cached models",
    "  • Large files → Streamed from Hugging Face",
    "  • Loading status → Status bar shows progress"
])

# Slide 15: Future Improvements
add_content_slide("Potential Enhancements", [
    "🔧 TECHNICAL",
    "  • Add confidence scores to predictions",
    "  • Support for batch processing (multiple files)",
    "  • Model quantization for faster inference",
    "  • GPU acceleration for speedup",
    "",
    "📊 FEATURES",
    "  • Database to store prediction history",
    "  • User authentication for web version",
    "  • Mobile app (React Native / Flutter)",
    "  • Real-time bird detection from camera/mic",
    "  • Multi-species probability rankings"
])

# Slide 16: Summary
add_content_slide("Summary", [
    "✅ TWO INPUT TYPES: Images & Audio recordings",
    "✅ TWO INTERFACES: Desktop (Tkinter) & Web (Flask)",
    "✅ STATE-OF-ART MODELS: Vision Transformer + wav2vec2",
    "✅ AUTOMATIC PREPROCESSING: Handles all formats",
    "✅ REAL-TIME PREDICTIONS: Instant results",
    "✅ USER-FRIENDLY: No ML knowledge needed",
    "✅ SCALABLE: Can serve multiple users (web version)"
])

# Slide 17: Questions
add_title_slide("Questions?", "🐦 Bird Species Classifier 🐦")

# Save presentation
prs.save('Bird_Species_Classifier_PPT.pptx')
print("✅ PowerPoint created: Bird_Species_Classifier_PPT.pptx")