Spaces:

thesoikindustries24
/

birdscanner-api

Sleeping

App Files Files Community

birdscanner-api / create_ppt.py

thesoikindustries24

Prepare bird scanner for Render deployment

348da87 28 days ago

raw

history blame contribute delete

11.1 kB

	from pptx import Presentation
	from pptx.util import Inches, Pt
	from pptx.enum.text import PP_ALIGN
	from pptx.dml.color import RGBColor

	# Create presentation
	prs = Presentation()
	prs.slide_width = Inches(10)
	prs.slide_height = Inches(7.5)

	def add_title_slide(title, subtitle):
	"""Add a title slide"""
	slide = prs.slides.add_slide(prs.slide_layouts[6]) # Blank layout
	background = slide.background
	fill = background.fill
	fill.solid()
	fill.fore_color.rgb = RGBColor(51, 152, 219) # Blue background

	# Add title
	title_box = slide.shapes.add_textbox(Inches(0.5), Inches(2.5), Inches(9), Inches(1.5))
	title_frame = title_box.text_frame
	title_frame.text = title
	title_frame.paragraphs[0].font.size = Pt(60)
	title_frame.paragraphs[0].font.bold = True
	title_frame.paragraphs[0].font.color.rgb = RGBColor(255, 255, 255)
	title_frame.paragraphs[0].alignment = PP_ALIGN.CENTER

	# Add subtitle
	subtitle_box = slide.shapes.add_textbox(Inches(0.5), Inches(4.2), Inches(9), Inches(1))
	subtitle_frame = subtitle_box.text_frame
	subtitle_frame.text = subtitle
	subtitle_frame.paragraphs[0].font.size = Pt(32)
	subtitle_frame.paragraphs[0].font.color.rgb = RGBColor(255, 255, 255)
	subtitle_frame.paragraphs[0].alignment = PP_ALIGN.CENTER

	def add_content_slide(title, content_list):
	"""Add a slide with title and bullet points"""
	slide = prs.slides.add_slide(prs.slide_layouts[6]) # Blank layout
	background = slide.background
	fill = background.fill
	fill.solid()
	fill.fore_color.rgb = RGBColor(240, 240, 240)

	# Add title
	title_box = slide.shapes.add_textbox(Inches(0.5), Inches(0.5), Inches(9), Inches(0.8))
	title_frame = title_box.text_frame
	title_frame.text = title
	title_frame.paragraphs[0].font.size = Pt(44)
	title_frame.paragraphs[0].font.bold = True
	title_frame.paragraphs[0].font.color.rgb = RGBColor(44, 62, 80)

	# Add content
	content_box = slide.shapes.add_textbox(Inches(1), Inches(1.5), Inches(8), Inches(5.5))
	text_frame = content_box.text_frame
	text_frame.word_wrap = True

	for i, item in enumerate(content_list):
	if i > 0:
	text_frame.add_paragraph()
	p = text_frame.paragraphs[i]
	p.text = item
	p.font.size = Pt(22)
	p.font.color.rgb = RGBColor(52, 73, 94)
	p.space_before = Pt(12)
	p.space_after = Pt(12)
	p.level = 0

	# Slide 1: Title
	add_title_slide("🐦 Bird Species Classifier", "Complete Project Logic & Architecture")

	# Slide 2: Project Overview
	add_content_slide("Project Overview", [
	"✓ AI-powered application for identifying bird species",
	"✓ Accepts TWO inputs: Bird images and bird sounds",
	"✓ Uses Deep Learning models to classify species",
	"✓ Available in TWO formats: Desktop (Tkinter) & Web (Flask)",
	"✓ Real-time predictions with high accuracy"
	])

	# Slide 3: Key Features
	add_content_slide("Key Features", [
	"📷 Image Classification - Identify birds from photos",
	"🎵 Audio Classification - Identify birds from sound recordings",
	"🔄 Real-time Processing - Instant predictions",
	"🎨 User-friendly Interface - Easy to use for everyone",
	"⚡ Multi-threaded Loading - Smooth user experience",
	"☁️ Uses Pre-trained Models - No training needed"
	])

	# Slide 4: Architecture Overview
	add_content_slide("System Architecture", [
	"INPUT LAYER",
	" • Image File (.jpg, .png, .bmp)",
	" • Audio File (.wav, .flac, .mp3)",
	"",
	"PROCESSING LAYER",
	" • Image Processor / Audio Feature Extractor",
	"",
	"AI MODEL LAYER",
	" • Vision Transformer (Image)",
	" • wav2vec2 Model (Audio)",
	"",
	"OUTPUT LAYER",
	" • Bird Species Prediction"
	])

	# Slide 5: Image Classification Logic
	add_content_slide("Image Classification Pipeline", [
	"1. USER UPLOADS IMAGE",
	" → Select bird photo file",
	"",
	"2. IMAGE PREPROCESSING",
	" → Load image using PIL",
	" → Convert to RGB format",
	" → Resize & normalize using AutoImageProcessor",
	"",
	"3. MODEL INFERENCE",
	" → Pass processed image to ViT/CNN model",
	" → Extract feature embeddings",
	"",
	"4. PREDICTION",
	" → Get logits (raw scores) from model",
	" → Apply argmax to get highest probability class",
	" → Return bird species name"
	])

	# Slide 6: Audio Classification Logic
	add_content_slide("Audio Classification Pipeline", [
	"1. USER UPLOADS AUDIO FILE",
	" → Select .wav, .flac, or .mp3 file",
	"",
	"2. AUDIO PREPROCESSING (librosa)",
	" → Load audio file",
	" → Resample to 16kHz (standard rate)",
	" → Convert to MONO (single channel)",
	" → Extract sound features",
	"",
	"3. FEATURE EXTRACTION (wav2vec2)",
	" → Process audio through feature extractor",
	" → Convert to numerical embeddings",
	"",
	"4. MODEL INFERENCE",
	" → Pass embeddings to audio classification model",
	" → Get prediction logits",
	"",
	"5. OUTPUT SPECIES",
	" → argmax() gets highest probability"
	])

	# Slide 7: Models Deep Dive
	add_content_slide("AI Models Used", [
	"IMAGE MODEL: chriamue/bird-species-classifier",
	" • Type: Vision Transformer (ViT)",
	" • Uses: Self-attention mechanism for images",
	" • Divides image into patches → Analyzes relationships",
	"",
	"AUDIO MODEL: greenarcade/wav2vec2-vd-bird-sound",
	" • Type: wav2vec2 (Self-supervised learning)",
	" • Uses: Transformer architecture on audio",
	" • Pre-trained on large unlabeled audio → Fine-tuned for birds"
	])

	# Slide 8: Technologies & Libraries
	add_content_slide("Tech Stack", [
	"🐍 BACKEND",
	" • Python 3.13",
	" • PyTorch - Deep learning framework",
	" • Transformers - Pre-trained models library",
	" • librosa - Audio signal processing",
	"",
	"🖥️ FRONTEND",
	" • Tkinter - Desktop GUI (main.py)",
	" • Flask - Web framework (app.py)",
	" • HTML/CSS/JavaScript - Web interface"
	])

	# Slide 9: Desktop App (Tkinter)
	add_content_slide("Desktop Version (main.py)", [
	"UI Components:",
	" • Upload Image Button → Opens file dialog",
	" • Upload Audio Button → Opens file dialog",
	" • Preview Area → Shows selected file",
	" • Result Area → Shows prediction + species name",
	" • Next Bird Button → Reset for new prediction",
	"",
	"Workflow:",
	" 1. App loads models on startup (background thread)",
	" 2. User selects image/audio",
	" 3. Processing happens in background",
	" 4. Result displays in GUI",
	" 5. User can predict again"
	])

	# Slide 10: Web App (Flask)
	add_content_slide("Web Version (app.py)", [
	"Backend Endpoints:",
	" • / → Serves HTML interface",
	" • /status → Check if models are loaded",
	" • /classify-image → Process image",
	" • /classify-audio → Process audio",
	"",
	"Frontend (JavaScript):",
	" • Polls /status until models load",
	" • Sends files to Flask via POST requests",
	" • Displays results in browser",
	"",
	"Run: python app.py",
	"Access: http://localhost:5000"
	])

	# Slide 11: Data Flow Diagram
	add_content_slide("Complete Data Flow", [
	"USER INPUT (Image/Audio)",
	" ↓",
	"FILE UPLOAD (Form/Dialog)",
	" ↓",
	"PREPROCESSING (PIL/librosa)",
	" ↓",
	"FEATURE EXTRACTION (AutoProcessor/AutoExtractor)",
	" ↓",
	"DEEP LEARNING MODEL (ViT/wav2vec2)",
	" ↓",
	"OUTPUT PREDICTIONS (Logits)",
	" ↓",
	"ARGMAX (Get highest probability)",
	" ↓",
	"SPECIES NAME (Display to user)"
	])

	# Slide 12: Key Concepts Explained
	add_content_slide("Key AI Concepts", [
	"TRANSFORMER: Neural network architecture using attention",
	" → Analyzes relationships between all input elements",
	"",
	"VISION TRANSFORMER (ViT): Applies transformers to images",
	" → Treats image patches like words in sentences",
	"",
	"WAV2VEC2: Self-supervised audio representation learning",
	" → Learns from unlabeled audio data",
	" → Converts sound to meaningful features",
	"",
	"INFERENCE: Using trained model to make predictions",
	" → No training happening, just prediction",
	"",
	"ARGMAX: Gets index of highest value in array",
	" → Converts [0.1, 0.8, 0.1] → index 1 (bird species)"
	])

	# Slide 13: Performance Metrics
	add_content_slide("Model Performance", [
	"IMAGE CLASSIFICATION:",
	" • Models: Trained on thousands of bird images",
	" • Accuracy: High confidence for clear photos",
	" • Speed: ~1-2 seconds per image",
	"",
	"AUDIO CLASSIFICATION:",
	" • Models: Fine-tuned on bird sound datasets",
	" • Accuracy: Excellent for 5-30 second clips",
	" • Speed: ~1-3 seconds per audio file",
	"",
	"NOTE: Quality of input affects accuracy",
	" → Clear images = Better results",
	" → Clean audio = Better results"
	])

	# Slide 14: Error Handling
	add_content_slide("Error Handling & Edge Cases", [
	"Image Issues:",
	" • Invalid format → Error message shown",
	" • Corrupted file → Exception caught and reported",
	"",
	"Audio Issues:",
	" • Unsupported format → librosa fails gracefully",
	" • Empty audio → Validation check prevents crash",
	" • Very short clips → Model may give low confidence",
	"",
	"Model Loading:",
	" • Networks offline → Downloads cached models",
	" • Large files → Streamed from Hugging Face",
	" • Loading status → Status bar shows progress"
	])

	# Slide 15: Future Improvements
	add_content_slide("Potential Enhancements", [
	"🔧 TECHNICAL",
	" • Add confidence scores to predictions",
	" • Support for batch processing (multiple files)",
	" • Model quantization for faster inference",
	" • GPU acceleration for speedup",
	"",
	"📊 FEATURES",
	" • Database to store prediction history",
	" • User authentication for web version",
	" • Mobile app (React Native / Flutter)",
	" • Real-time bird detection from camera/mic",
	" • Multi-species probability rankings"
	])

	# Slide 16: Summary
	add_content_slide("Summary", [
	"✅ TWO INPUT TYPES: Images & Audio recordings",
	"✅ TWO INTERFACES: Desktop (Tkinter) & Web (Flask)",
	"✅ STATE-OF-ART MODELS: Vision Transformer + wav2vec2",
	"✅ AUTOMATIC PREPROCESSING: Handles all formats",
	"✅ REAL-TIME PREDICTIONS: Instant results",
	"✅ USER-FRIENDLY: No ML knowledge needed",
	"✅ SCALABLE: Can serve multiple users (web version)"
	])

	# Slide 17: Questions
	add_title_slide("Questions?", "🐦 Bird Species Classifier 🐦")

	# Save presentation
	prs.save('Bird_Species_Classifier_PPT.pptx')
	print("✅ PowerPoint created: Bird_Species_Classifier_PPT.pptx")