birdscanner-api / create_ppt.py
thesoikindustries24's picture
Prepare bird scanner for Render deployment
348da87
from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.enum.text import PP_ALIGN
from pptx.dml.color import RGBColor
# Create presentation
prs = Presentation()
prs.slide_width = Inches(10)
prs.slide_height = Inches(7.5)
def add_title_slide(title, subtitle):
"""Add a title slide"""
slide = prs.slides.add_slide(prs.slide_layouts[6]) # Blank layout
background = slide.background
fill = background.fill
fill.solid()
fill.fore_color.rgb = RGBColor(51, 152, 219) # Blue background
# Add title
title_box = slide.shapes.add_textbox(Inches(0.5), Inches(2.5), Inches(9), Inches(1.5))
title_frame = title_box.text_frame
title_frame.text = title
title_frame.paragraphs[0].font.size = Pt(60)
title_frame.paragraphs[0].font.bold = True
title_frame.paragraphs[0].font.color.rgb = RGBColor(255, 255, 255)
title_frame.paragraphs[0].alignment = PP_ALIGN.CENTER
# Add subtitle
subtitle_box = slide.shapes.add_textbox(Inches(0.5), Inches(4.2), Inches(9), Inches(1))
subtitle_frame = subtitle_box.text_frame
subtitle_frame.text = subtitle
subtitle_frame.paragraphs[0].font.size = Pt(32)
subtitle_frame.paragraphs[0].font.color.rgb = RGBColor(255, 255, 255)
subtitle_frame.paragraphs[0].alignment = PP_ALIGN.CENTER
def add_content_slide(title, content_list):
"""Add a slide with title and bullet points"""
slide = prs.slides.add_slide(prs.slide_layouts[6]) # Blank layout
background = slide.background
fill = background.fill
fill.solid()
fill.fore_color.rgb = RGBColor(240, 240, 240)
# Add title
title_box = slide.shapes.add_textbox(Inches(0.5), Inches(0.5), Inches(9), Inches(0.8))
title_frame = title_box.text_frame
title_frame.text = title
title_frame.paragraphs[0].font.size = Pt(44)
title_frame.paragraphs[0].font.bold = True
title_frame.paragraphs[0].font.color.rgb = RGBColor(44, 62, 80)
# Add content
content_box = slide.shapes.add_textbox(Inches(1), Inches(1.5), Inches(8), Inches(5.5))
text_frame = content_box.text_frame
text_frame.word_wrap = True
for i, item in enumerate(content_list):
if i > 0:
text_frame.add_paragraph()
p = text_frame.paragraphs[i]
p.text = item
p.font.size = Pt(22)
p.font.color.rgb = RGBColor(52, 73, 94)
p.space_before = Pt(12)
p.space_after = Pt(12)
p.level = 0
# Slide 1: Title
add_title_slide("🐦 Bird Species Classifier", "Complete Project Logic & Architecture")
# Slide 2: Project Overview
add_content_slide("Project Overview", [
"βœ“ AI-powered application for identifying bird species",
"βœ“ Accepts TWO inputs: Bird images and bird sounds",
"βœ“ Uses Deep Learning models to classify species",
"βœ“ Available in TWO formats: Desktop (Tkinter) & Web (Flask)",
"βœ“ Real-time predictions with high accuracy"
])
# Slide 3: Key Features
add_content_slide("Key Features", [
"πŸ“· Image Classification - Identify birds from photos",
"🎡 Audio Classification - Identify birds from sound recordings",
"πŸ”„ Real-time Processing - Instant predictions",
"🎨 User-friendly Interface - Easy to use for everyone",
"⚑ Multi-threaded Loading - Smooth user experience",
"☁️ Uses Pre-trained Models - No training needed"
])
# Slide 4: Architecture Overview
add_content_slide("System Architecture", [
"INPUT LAYER",
" β€’ Image File (.jpg, .png, .bmp)",
" β€’ Audio File (.wav, .flac, .mp3)",
"",
"PROCESSING LAYER",
" β€’ Image Processor / Audio Feature Extractor",
"",
"AI MODEL LAYER",
" β€’ Vision Transformer (Image)",
" β€’ wav2vec2 Model (Audio)",
"",
"OUTPUT LAYER",
" β€’ Bird Species Prediction"
])
# Slide 5: Image Classification Logic
add_content_slide("Image Classification Pipeline", [
"1. USER UPLOADS IMAGE",
" β†’ Select bird photo file",
"",
"2. IMAGE PREPROCESSING",
" β†’ Load image using PIL",
" β†’ Convert to RGB format",
" β†’ Resize & normalize using AutoImageProcessor",
"",
"3. MODEL INFERENCE",
" β†’ Pass processed image to ViT/CNN model",
" β†’ Extract feature embeddings",
"",
"4. PREDICTION",
" β†’ Get logits (raw scores) from model",
" β†’ Apply argmax to get highest probability class",
" β†’ Return bird species name"
])
# Slide 6: Audio Classification Logic
add_content_slide("Audio Classification Pipeline", [
"1. USER UPLOADS AUDIO FILE",
" β†’ Select .wav, .flac, or .mp3 file",
"",
"2. AUDIO PREPROCESSING (librosa)",
" β†’ Load audio file",
" β†’ Resample to 16kHz (standard rate)",
" β†’ Convert to MONO (single channel)",
" β†’ Extract sound features",
"",
"3. FEATURE EXTRACTION (wav2vec2)",
" β†’ Process audio through feature extractor",
" β†’ Convert to numerical embeddings",
"",
"4. MODEL INFERENCE",
" β†’ Pass embeddings to audio classification model",
" β†’ Get prediction logits",
"",
"5. OUTPUT SPECIES",
" β†’ argmax() gets highest probability"
])
# Slide 7: Models Deep Dive
add_content_slide("AI Models Used", [
"IMAGE MODEL: chriamue/bird-species-classifier",
" β€’ Type: Vision Transformer (ViT)",
" β€’ Uses: Self-attention mechanism for images",
" β€’ Divides image into patches β†’ Analyzes relationships",
"",
"AUDIO MODEL: greenarcade/wav2vec2-vd-bird-sound",
" β€’ Type: wav2vec2 (Self-supervised learning)",
" β€’ Uses: Transformer architecture on audio",
" β€’ Pre-trained on large unlabeled audio β†’ Fine-tuned for birds"
])
# Slide 8: Technologies & Libraries
add_content_slide("Tech Stack", [
"🐍 BACKEND",
" β€’ Python 3.13",
" β€’ PyTorch - Deep learning framework",
" β€’ Transformers - Pre-trained models library",
" β€’ librosa - Audio signal processing",
"",
"πŸ–₯️ FRONTEND",
" β€’ Tkinter - Desktop GUI (main.py)",
" β€’ Flask - Web framework (app.py)",
" β€’ HTML/CSS/JavaScript - Web interface"
])
# Slide 9: Desktop App (Tkinter)
add_content_slide("Desktop Version (main.py)", [
"UI Components:",
" β€’ Upload Image Button β†’ Opens file dialog",
" β€’ Upload Audio Button β†’ Opens file dialog",
" β€’ Preview Area β†’ Shows selected file",
" β€’ Result Area β†’ Shows prediction + species name",
" β€’ Next Bird Button β†’ Reset for new prediction",
"",
"Workflow:",
" 1. App loads models on startup (background thread)",
" 2. User selects image/audio",
" 3. Processing happens in background",
" 4. Result displays in GUI",
" 5. User can predict again"
])
# Slide 10: Web App (Flask)
add_content_slide("Web Version (app.py)", [
"Backend Endpoints:",
" β€’ / β†’ Serves HTML interface",
" β€’ /status β†’ Check if models are loaded",
" β€’ /classify-image β†’ Process image",
" β€’ /classify-audio β†’ Process audio",
"",
"Frontend (JavaScript):",
" β€’ Polls /status until models load",
" β€’ Sends files to Flask via POST requests",
" β€’ Displays results in browser",
"",
"Run: python app.py",
"Access: http://localhost:5000"
])
# Slide 11: Data Flow Diagram
add_content_slide("Complete Data Flow", [
"USER INPUT (Image/Audio)",
" ↓",
"FILE UPLOAD (Form/Dialog)",
" ↓",
"PREPROCESSING (PIL/librosa)",
" ↓",
"FEATURE EXTRACTION (AutoProcessor/AutoExtractor)",
" ↓",
"DEEP LEARNING MODEL (ViT/wav2vec2)",
" ↓",
"OUTPUT PREDICTIONS (Logits)",
" ↓",
"ARGMAX (Get highest probability)",
" ↓",
"SPECIES NAME (Display to user)"
])
# Slide 12: Key Concepts Explained
add_content_slide("Key AI Concepts", [
"TRANSFORMER: Neural network architecture using attention",
" β†’ Analyzes relationships between all input elements",
"",
"VISION TRANSFORMER (ViT): Applies transformers to images",
" β†’ Treats image patches like words in sentences",
"",
"WAV2VEC2: Self-supervised audio representation learning",
" β†’ Learns from unlabeled audio data",
" β†’ Converts sound to meaningful features",
"",
"INFERENCE: Using trained model to make predictions",
" β†’ No training happening, just prediction",
"",
"ARGMAX: Gets index of highest value in array",
" β†’ Converts [0.1, 0.8, 0.1] β†’ index 1 (bird species)"
])
# Slide 13: Performance Metrics
add_content_slide("Model Performance", [
"IMAGE CLASSIFICATION:",
" β€’ Models: Trained on thousands of bird images",
" β€’ Accuracy: High confidence for clear photos",
" β€’ Speed: ~1-2 seconds per image",
"",
"AUDIO CLASSIFICATION:",
" β€’ Models: Fine-tuned on bird sound datasets",
" β€’ Accuracy: Excellent for 5-30 second clips",
" β€’ Speed: ~1-3 seconds per audio file",
"",
"NOTE: Quality of input affects accuracy",
" β†’ Clear images = Better results",
" β†’ Clean audio = Better results"
])
# Slide 14: Error Handling
add_content_slide("Error Handling & Edge Cases", [
"Image Issues:",
" β€’ Invalid format β†’ Error message shown",
" β€’ Corrupted file β†’ Exception caught and reported",
"",
"Audio Issues:",
" β€’ Unsupported format β†’ librosa fails gracefully",
" β€’ Empty audio β†’ Validation check prevents crash",
" β€’ Very short clips β†’ Model may give low confidence",
"",
"Model Loading:",
" β€’ Networks offline β†’ Downloads cached models",
" β€’ Large files β†’ Streamed from Hugging Face",
" β€’ Loading status β†’ Status bar shows progress"
])
# Slide 15: Future Improvements
add_content_slide("Potential Enhancements", [
"πŸ”§ TECHNICAL",
" β€’ Add confidence scores to predictions",
" β€’ Support for batch processing (multiple files)",
" β€’ Model quantization for faster inference",
" β€’ GPU acceleration for speedup",
"",
"πŸ“Š FEATURES",
" β€’ Database to store prediction history",
" β€’ User authentication for web version",
" β€’ Mobile app (React Native / Flutter)",
" β€’ Real-time bird detection from camera/mic",
" β€’ Multi-species probability rankings"
])
# Slide 16: Summary
add_content_slide("Summary", [
"βœ… TWO INPUT TYPES: Images & Audio recordings",
"βœ… TWO INTERFACES: Desktop (Tkinter) & Web (Flask)",
"βœ… STATE-OF-ART MODELS: Vision Transformer + wav2vec2",
"βœ… AUTOMATIC PREPROCESSING: Handles all formats",
"βœ… REAL-TIME PREDICTIONS: Instant results",
"βœ… USER-FRIENDLY: No ML knowledge needed",
"βœ… SCALABLE: Can serve multiple users (web version)"
])
# Slide 17: Questions
add_title_slide("Questions?", "🐦 Bird Species Classifier 🐦")
# Save presentation
prs.save('Bird_Species_Classifier_PPT.pptx')
print("βœ… PowerPoint created: Bird_Species_Classifier_PPT.pptx")