Spaces:
Build error
Build error
| #!/usr/bin/env python3 | |
| """ | |
| M4B Chapter Extractor | |
| A command-line tool to extract chapters from M4B audiobook files | |
| and save them as individual MP3 files. | |
| Requirements: | |
| - ffmpeg installed and accessible in PATH | |
| - Python 3.6+ | |
| Usage: | |
| python m4b_chapter_extractor.py input.m4b -o output_folder | |
| """ | |
| import argparse | |
| import os | |
| import sys | |
| import subprocess | |
| import json | |
| import re | |
| from pathlib import Path | |
| from typing import List, Dict, Optional | |
| class M4BChapterExtractor: | |
| def __init__(self, input_file: str, output_dir: str, quality: str = "192k"): | |
| self.input_file = Path(input_file) | |
| self.output_dir = Path(output_dir) | |
| self.quality = quality | |
| # Validate input file | |
| if not self.input_file.exists(): | |
| raise FileNotFoundError(f"Input file not found: {input_file}") | |
| if not self.input_file.suffix.lower() in ['.m4b', '.m4a']: | |
| raise ValueError("Input file must be an M4B or M4A file") | |
| # Create output directory | |
| self.output_dir.mkdir(parents=True, exist_ok=True) | |
| def check_ffmpeg(self) -> bool: | |
| """Check if FFmpeg is available in the system PATH.""" | |
| try: | |
| result = subprocess.run(['ffmpeg', '-version'], | |
| capture_output=True, text=True) | |
| return result.returncode == 0 | |
| except FileNotFoundError: | |
| return False | |
| def get_chapters(self) -> List[Dict]: | |
| """Extract chapter information from the M4B file.""" | |
| cmd = [ | |
| 'ffprobe', | |
| '-v', 'quiet', | |
| '-print_format', 'json', | |
| '-show_chapters', | |
| str(self.input_file) | |
| ] | |
| try: | |
| result = subprocess.run(cmd, capture_output=True, text=True, check=True) | |
| data = json.loads(result.stdout) | |
| return data.get('chapters', []) | |
| except subprocess.CalledProcessError as e: | |
| print(f"Error getting chapters: {e}") | |
| return [] | |
| except json.JSONDecodeError as e: | |
| print(f"Error parsing chapter data: {e}") | |
| return [] | |
| def sanitize_filename(self, filename: str) -> str: | |
| """Sanitize filename by removing/replacing invalid characters.""" | |
| # Remove or replace invalid characters | |
| filename = re.sub(r'[<>:"/\\|?*]', '', filename) | |
| filename = re.sub(r'\s+', ' ', filename).strip() | |
| # Ensure filename isn't too long (limit to 200 characters) | |
| if len(filename) > 200: | |
| filename = filename[:200].strip() | |
| return filename or "Chapter" | |
| def format_time(self, seconds: float) -> str: | |
| """Convert seconds to HH:MM:SS.mmm format.""" | |
| hours = int(seconds // 3600) | |
| minutes = int((seconds % 3600) // 60) | |
| secs = seconds % 60 | |
| return f"{hours:02d}:{minutes:02d}:{secs:06.3f}" | |
| def extract_chapter(self, chapter: Dict, chapter_num: int, total_chapters: int) -> bool: | |
| """Extract a single chapter to MP3 file.""" | |
| # Get chapter title | |
| title = chapter.get('tags', {}).get('title', f"Chapter {chapter_num:02d}") | |
| title = self.sanitize_filename(title) | |
| # Create output filename | |
| output_filename = f"{chapter_num:02d} - {title}.mp3" | |
| output_path = self.output_dir / output_filename | |
| # Get start and end times | |
| start_time = float(chapter['start_time']) | |
| end_time = float(chapter['end_time']) | |
| duration = end_time - start_time | |
| print(f"Extracting [{chapter_num}/{total_chapters}]: {title}") | |
| print(f" Duration: {self.format_time(duration)}") | |
| # FFmpeg command to extract chapter | |
| cmd = [ | |
| 'ffmpeg', | |
| '-i', str(self.input_file), | |
| '-ss', str(start_time), | |
| '-t', str(duration), | |
| '-acodec', 'libmp3lame', | |
| '-ab', self.quality, | |
| '-map_metadata', '0', | |
| '-id3v2_version', '3', | |
| '-metadata', f'title={title}', | |
| '-metadata', f'track={chapter_num}/{total_chapters}', | |
| '-y', # Overwrite output file | |
| str(output_path) | |
| ] | |
| try: | |
| result = subprocess.run(cmd, capture_output=True, text=True) | |
| if result.returncode == 0: | |
| print(f" ✓ Saved: {output_filename}") | |
| return True | |
| else: | |
| print(f" ✗ Error extracting chapter: {result.stderr}") | |
| return False | |
| except Exception as e: | |
| print(f" ✗ Exception during extraction: {e}") | |
| return False | |
| def extract_all_chapters(self) -> bool: | |
| """Extract all chapters from the M4B file.""" | |
| print(f"Processing: {self.input_file.name}") | |
| print(f"Output directory: {self.output_dir}") | |
| # Check if FFmpeg is available | |
| if not self.check_ffmpeg(): | |
| print("Error: FFmpeg not found. Please install FFmpeg and ensure it's in your PATH.") | |
| return False | |
| # Get chapters | |
| chapters = self.get_chapters() | |
| if not chapters: | |
| print("No chapters found in the M4B file.") | |
| return False | |
| print(f"Found {len(chapters)} chapters") | |
| print("-" * 50) | |
| # Extract each chapter | |
| success_count = 0 | |
| for i, chapter in enumerate(chapters, 1): | |
| if self.extract_chapter(chapter, i, len(chapters)): | |
| success_count += 1 | |
| print() | |
| # Summary | |
| print("-" * 50) | |
| print(f"Extraction complete: {success_count}/{len(chapters)} chapters extracted successfully") | |
| if success_count == len(chapters): | |
| print("All chapters extracted successfully!") | |
| return True | |
| else: | |
| print(f"Warning: {len(chapters) - success_count} chapters failed to extract") | |
| return False | |
| def main(): | |
| parser = argparse.ArgumentParser( | |
| description="Extract chapters from M4B audiobook files as individual MP3 files", | |
| formatter_class=argparse.RawDescriptionHelpFormatter, | |
| epilog=""" | |
| Examples: | |
| python m4b_chapter_extractor.py audiobook.m4b -o chapters/ | |
| python m4b_chapter_extractor.py audiobook.m4b -o output/ -q 128k | |
| python m4b_chapter_extractor.py audiobook.m4b -o output/ --quality 256k | |
| Requirements: | |
| - FFmpeg must be installed and accessible in PATH | |
| - Input file must be M4B or M4A format | |
| """ | |
| ) | |
| parser.add_argument( | |
| 'input_file', | |
| help='Path to the input M4B audiobook file' | |
| ) | |
| parser.add_argument( | |
| '-o', '--output', | |
| required=True, | |
| help='Output directory for extracted MP3 chapters' | |
| ) | |
| parser.add_argument( | |
| '-q', '--quality', | |
| default='192k', | |
| help='MP3 audio quality/bitrate (default: 192k). Examples: 128k, 192k, 256k, 320k' | |
| ) | |
| parser.add_argument( | |
| '-v', '--verbose', | |
| action='store_true', | |
| help='Enable verbose output' | |
| ) | |
| args = parser.parse_args() | |
| try: | |
| # Create extractor instance | |
| extractor = M4BChapterExtractor( | |
| input_file=args.input_file, | |
| output_dir=args.output, | |
| quality=args.quality | |
| ) | |
| # Extract chapters | |
| success = extractor.extract_all_chapters() | |
| # Exit with appropriate code | |
| sys.exit(0 if success else 1) | |
| except FileNotFoundError as e: | |
| print(f"Error: {e}") | |
| sys.exit(1) | |
| except ValueError as e: | |
| print(f"Error: {e}") | |
| sys.exit(1) | |
| except KeyboardInterrupt: | |
| print("\nOperation cancelled by user") | |
| sys.exit(1) | |
| except Exception as e: | |
| print(f"Unexpected error: {e}") | |
| sys.exit(1) | |
| if __name__ == "__main__": | |
| main() |