Spaces:

MazCodes
/

fragmenta

Sleeping

App Files Files

MazCodes commited on Apr 20

Commit

72f7156

verified ·

1 Parent(s): 44978ad

Upload folder using huggingface_hub

Browse files

Files changed (11) hide show

app/backend/app.py +92 -154
app/backend/data/simple_audio_processor.py +6 -13
app/core/config.py +5 -8
app/core/generation/audio_generator.py +0 -17
app/core/model_manager.py +8 -13
app/frontend/build/assets/index-RtS7dlIj.js +0 -0
app/frontend/build/index.html +1 -1
app/frontend/src/components/BulkAnnotatePanel.js +71 -8
app/frontend/src/components/HfAuthDialog.js +3 -11
utils/exceptions.py +0 -1
utils/logger.py +0 -9

app/backend/app.py CHANGED Viewed

@@ -58,13 +58,6 @@ def request_entity_too_large(error):
 DEBUG_MODE = os.environ.get('FRAGMENTA_DEBUG', 'false').lower() == 'true'
-# ---------------------------------------------------------------------------
-# Lazy-initialised backend components
-# ---------------------------------------------------------------------------
-# These are initialised on first real API request (not at import time) so that
-# the Flask server always starts — even when model files or heavy deps are
-# temporarily unavailable.  The /api/health endpoint works unconditionally.
-# ---------------------------------------------------------------------------
 config = None
 audio_processor = None
 generator = None
@@ -74,7 +67,6 @@ _init_error = None
 def _ensure_components():
-    """Initialise backend components on first use. Thread-safe."""
     global config, audio_processor, generator, model_manager
     global _components_initialised, _init_error
@@ -105,21 +97,18 @@ def _ensure_components():
 @app.before_request
 def lazy_init():
-    """Initialise heavy components before the first real API call."""
     if request.path == '/api/health':
-        return  # health endpoint must always work
     try:
         _ensure_components()
     except Exception as e:
         if request.path.startswith('/api/'):
             return jsonify({'error': f'Backend not ready: {e}'}), 503
-        # Static file / React routes — let them through even if init fails
         return None
 @app.route('/api/health')
 def health_check():
-    """Health check endpoint — always available, even when components fail."""
     import torch
     status = {
         'status': 'ok' if _components_initialised else 'degraded',
@@ -128,9 +117,8 @@ def health_check():
         'gpu_available': torch.cuda.is_available(),
         'gpu_name': torch.cuda.get_device_name(0) if torch.cuda.is_available() else None,
     }
-    code = 200 if _components_initialised else 503
     # Return 200 even in degraded mode so Docker HEALTHCHECK doesn't kill
-    # the container before components finish loading
     return jsonify(status), 200
@@ -208,18 +196,13 @@ def process_files():
         chunks_preview_data = []
         for filename, prompt in prompts_data:
-            chunks_preview_data.append([
-                filename,  # Original filename (not chunked)
-                filename,  # Source file
-                prompt,    # User's prompt
-                "original" # Not chunked
-            ])
-        # Do not overwrite the metadata! keeps dataset creation more sustainable
         json_path = Path(config.get_metadata_json_path())
         existing_metadata = []
-        # Load existing metadata if file exists
         if json_path.exists():
             try:
                 with open(json_path, 'r', encoding='utf-8') as f:
@@ -254,7 +237,7 @@ def process_files():
             'message': f'Files saved successfully! {len(saved_files)} original files saved to data folder',
             'saved_files': saved_files,
             'processed_count': len(saved_files),
-            'chunks_preview': chunks_preview_data,  # Show all files (no chunking)
             'data_folder': str(data_dir),
             'metadata_json': str(json_path),
             'approach': 'original_files_only'
@@ -366,7 +349,7 @@ def generate_audio():
         config_file = None
         model_file_path = None
-        # Priority: unwrapped_model_path > model_path > base model
         if unwrapped_model_path:
             model_file_path = Path(unwrapped_model_path)
             if not model_file_path.exists():
@@ -381,7 +364,7 @@ def generate_audio():
                     f"model_path:{model_name}", str(model_file_path))
             logger.debug(f"Using model path: {model_file_path}")
-        # Determine config based on file size or model name
         if model_file_path:
             file_size_gb = model_file_path.stat().st_size / (1024**3)
             config_file = "model_config_small.json" if file_size_gb < 2.0 else "model_config.json"
@@ -402,7 +385,6 @@ def generate_audio():
     logger.info(f"Starting generation with config: {config_file}")
     try:
         if determined_model_path and determined_model_path.exists():
-            # Use the determined model path
             output_path = generator.generate_audio(
                 prompt,
                 unwrapped_model_path=unwrapped_model_path if unwrapped_model_path else None,
@@ -411,7 +393,6 @@ def generate_audio():
                 duration=duration
             )
         elif model_name in ['stable-audio-open-small', 'stable-audio-open-1.0']:
-            # Handle base models
             model_file_mapping = {
                 'stable-audio-open-small': 'stable-audio-open-small-model.safetensors',
                 'stable-audio-open-1.0': 'stable-audio-open-model.safetensors'
@@ -548,10 +529,8 @@ def get_models():
                 has_checkpoint = len(checkpoint_files) > 0
                 has_config = len(config_files) > 0
-                # Create detailed checkpoint information
                 checkpoints = []
                 for ckpt_file in checkpoint_files:
-                    # Extract epoch and step from filename if possible
                     import re
                     name = ckpt_file.stem
                     epoch_match = re.search(r'epoch=(\d+)', name)
@@ -559,7 +538,6 @@ def get_models():
                     checkpoint_info = {
                         'name': name,
-                        # Use relative path
                         'path': str(ckpt_file.relative_to(config.project_root)),
                         'size_mb': round(ckpt_file.stat().st_size / (1024 * 1024), 1),
                         'created': ckpt_file.stat().st_mtime
@@ -572,45 +550,38 @@ def get_models():
                     checkpoints.append(checkpoint_info)
-                # Sort checkpoints by creation time (newest first)
                 checkpoints.sort(key=lambda x: x['created'], reverse=True)
-                # Get the latest checkpoint and config files
                 latest_checkpoint = max(checkpoint_files, key=lambda x: x.stat(
                 ).st_mtime) if checkpoint_files else None
                 latest_config = max(
                     config_files, key=lambda x: x.stat().st_mtime) if config_files else None
-                # Check for unwrapped models
                 unwrapped_dir = model_dir / "unwrapped"
                 unwrapped_models = []
                 if unwrapped_dir.exists():
                     for unwrapped_file in unwrapped_dir.glob("*.safetensors"):
                         unwrapped_models.append({
                             'name': unwrapped_file.stem,
-                            # Use relative path
                             'path': str(unwrapped_file.relative_to(config.project_root)),
                             'size_mb': round(unwrapped_file.stat().st_size / (1024 * 1024), 1),
                             'created': unwrapped_file.stat().st_mtime
                         })
-                    # Sort unwrapped models by creation time (newest first)
                     unwrapped_models.sort(
                         key=lambda x: x['created'], reverse=True)
-                # For fine-tuned models, use the base model's config
-                base_config_path = "models/config/model_config_small.json"  # Use relative path
                 models.append({
                     'name': model_dir.name,
-                    # Use relative path
                     'path': str(model_dir.relative_to(config.project_root)),
                     'has_checkpoint': has_checkpoint,
                     'has_config': has_config,
-                    # Use relative path
                     'ckpt_path': str(latest_checkpoint.relative_to(config.project_root)) if latest_checkpoint else None,
-                    'config_path': base_config_path,  # Use base model config for unwrapping
-                    'checkpoints': checkpoints,  # Detailed checkpoint list
                     'unwrapped_models': unwrapped_models,
                     'created': model_dir.stat().st_mtime if model_dir.exists() else None
                 })
@@ -622,7 +593,6 @@ def get_models():
 @app.route('/api/models/available', methods=['GET'])
 def get_available_models():
-    """Get list of available models from Hugging Face"""
     try:
         models = model_manager.get_available_models()
         return jsonify({'models': models})
@@ -632,7 +602,6 @@ def get_available_models():
 @app.route('/api/models/<model_id>/info', methods=['GET'])
 def get_model_info(model_id):
-    """Get information about a specific model"""
     try:
         model_info = model_manager.get_model_info(model_id)
         if not model_info:
@@ -644,7 +613,6 @@ def get_model_info(model_id):
 @app.route('/api/models/<model_id>/accept-terms', methods=['POST'])
 def accept_model_terms(model_id):
-    """Accept terms for a specific model"""
     try:
         success = model_manager.accept_terms(model_id)
         if success:
@@ -657,13 +625,10 @@ def accept_model_terms(model_id):
 @app.route('/api/models/<model_id>/download', methods=['POST'])
 def download_model(model_id):
-    """Download a model from Hugging Face"""
     try:
-        # Check if terms are accepted
         if not model_manager.is_terms_accepted(model_id):
             return jsonify({'error': 'Terms not accepted for this model'}), 400
-        # Start download
         success = model_manager.download_model(model_id)
         if success:
             return jsonify({
@@ -678,7 +643,6 @@ def download_model(model_id):
 @app.route('/api/hf-login', methods=['POST'])
 def hf_login():
-    """Login to Hugging Face with a token"""
     try:
         data = request.json
         token = data.get('token')
@@ -698,36 +662,33 @@ def hf_login():
 @app.route('/api/base-models/status', methods=['GET'])
 def get_base_models_status():
-    """Get the download status of base models"""
     try:
         import os
         from pathlib import Path
         base_models = {
             'stable-audio-open-1.0': {
                 'name': 'Stable Audio Open 1.0',
-                'path': 'models/pretrained',  # Updated to correct path
-                'file': 'stable-audio-open-model.safetensors',  # Specific file to check
                 'downloaded': False
             },
             'stable-audio-open-small': {
-                'name': 'Stable Audio Open Small',
-                'path': 'models/pretrained',  # Updated to correct path
-                'file': 'stable-audio-open-small-model.safetensors',  # Specific file to check
                 'downloaded': False
             }
         }
-        # Check if models are actually downloaded by looking for specific files
         for model_id, info in base_models.items():
             model_dir = Path(info['path'])
             model_file = model_dir / info['file']
-            # Check if the specific model file exists
             if model_file.exists() and model_file.is_file():
                 info['downloaded'] = True
             else:
-                # Fallback: check subdirectory structure (old format)
                 old_path = model_dir / model_id
                 if old_path.exists() and old_path.is_dir():
                     has_files = any([
@@ -746,7 +707,6 @@ def get_base_models_status():
 @app.route('/api/models/<model_id>/delete', methods=['DELETE'])
 def delete_model(model_id):
-    """Delete a downloaded model"""
     try:
         success = model_manager.delete_model(model_id)
         if success:
@@ -759,7 +719,6 @@ def delete_model(model_id):
 @app.route('/api/models/storage', methods=['GET'])
 def get_model_storage():
-    """Get storage information for models"""
     try:
         storage_info = model_manager.get_storage_info()
         return jsonify(storage_info)
@@ -769,21 +728,18 @@ def get_model_storage():
 @app.route('/api/start-fresh', methods=['POST'])
 def start_fresh():
-    """Delete all data and start fresh"""
     try:
         config = get_config()
         data_dir = config.get_path("data")
         config_dir = config.get_path("models_config")
-        # Delete all data files
         data_files_deleted = 0
         if data_dir.exists():
             for file_path in data_dir.glob("*"):
-                if file_path.is_file() and not file_path.name.endswith('.py'):  # Don't delete Python files
                     file_path.unlink()
                     data_files_deleted += 1
-        # Delete config metadata files (but keep the model configs)
         config_files_deleted = 0
         if config_dir.exists():
             for file_path in config_dir.glob("custom_metadata.py"):
@@ -791,7 +747,6 @@ def start_fresh():
                     file_path.unlink()
                     config_files_deleted += 1
-        # Recreate empty data directory
         data_dir.mkdir(exist_ok=True, parents=True)
         return jsonify({
@@ -806,7 +761,6 @@ def start_fresh():
 @app.route('/api/unwrap-model', methods=['POST'])
 def unwrap_model():
-    """Unwrap a specific model checkpoint"""
     try:
         data = request.json
         model_config = data.get('model_config')
@@ -816,34 +770,28 @@ def unwrap_model():
         if not model_config or not ckpt_path:
             return jsonify({'error': 'model_config and ckpt_path are required'}), 400
-        # Use the stable-audio-tools unwrap_model.py script directly for individual checkpoints
         import subprocess
         from pathlib import Path
-        # Get config to resolve relative paths
         config = get_config()
         repo_root = config.project_root
-        # Resolve paths relative to project root
         model_config_path = repo_root / \
             model_config if not Path(
                 model_config).is_absolute() else Path(model_config)
         ckpt_path_resolved = repo_root / \
             ckpt_path if not Path(ckpt_path).is_absolute() else Path(ckpt_path)
-        # Validate paths exist
         if not model_config_path.exists():
             return jsonify({'error': f'Model config not found: {model_config_path}'}), 400
         if not ckpt_path_resolved.exists():
             return jsonify({'error': f'Checkpoint not found: {ckpt_path_resolved}'}), 400
-        # Get the model directory and create unwrapped subdirectory
         model_dir = ckpt_path_resolved.parent
         unwrapped_dir = model_dir / "unwrapped"
         unwrapped_dir.mkdir(exist_ok=True)
         cmd = [
-            # Just the script name since we're running from stable-audio-tools dir
             sys.executable, 'unwrap_model.py',
             '--model-config', str(model_config_path),
             '--ckpt-path', str(ckpt_path_resolved),
@@ -851,17 +799,14 @@ def unwrap_model():
             '--use-safetensors'
         ]
-        # Run from repo root and set working directory to stable-audio-tools
         stable_audio_dir = repo_root / "stable-audio-tools"
         proc = subprocess.run(cmd, cwd=stable_audio_dir,
                               capture_output=True, text=True)
         if proc.returncode == 0:
-            # The unwrap_model.py script creates files in the stable-audio-tools directory
-            # We need to move them to the correct unwrapped directory
-            # Find the created file in stable-audio-tools directory
             import glob
             pattern = str(stable_audio_dir / f"{name}*.safetensors")
             created_files = glob.glob(pattern)
@@ -872,14 +817,12 @@ def unwrap_model():
                 target_path = unwrapped_dir / created_path.name
                 try:
-                    # Move the file to the unwrapped directory
                     created_path.rename(target_path)
                     moved_files.append(str(target_path))
                     print(f"Moved {created_path.name} to {target_path}")
                 except Exception as e:
                     print(f"Error moving {created_path}: {e}")
-            # Find all unwrapped files in the unwrapped directory
             unwrapped_files = list(unwrapped_dir.glob("*.safetensors"))
             return jsonify({
@@ -897,7 +840,6 @@ def unwrap_model():
 @app.route('/api/delete-checkpoint', methods=['POST'])
 def delete_checkpoint():
-    """Delete a specific checkpoint file"""
     try:
         data = request.json
         checkpoint_path = data.get('checkpoint_path')
@@ -905,11 +847,9 @@ def delete_checkpoint():
         if not checkpoint_path:
             return jsonify({'error': 'checkpoint_path is required'}), 400
-        # Get config to resolve relative paths
         config = get_config()
         repo_root = config.project_root
-        # Resolve path relative to project root
         ckpt_path_resolved = repo_root / \
             checkpoint_path if not Path(
                 checkpoint_path).is_absolute() else Path(checkpoint_path)
@@ -917,7 +857,7 @@ def delete_checkpoint():
         if not ckpt_path_resolved.exists():
             return jsonify({'error': f'Checkpoint file not found: {ckpt_path_resolved}'}), 404
-        # Ensure it's a .ckpt file for safety
         if not ckpt_path_resolved.suffix == '.ckpt':
             return jsonify({'error': f'Only .ckpt files can be deleted: {ckpt_path_resolved}'}), 400
@@ -937,7 +877,6 @@ def delete_checkpoint():
 @app.route('/api/delete-wrapped-checkpoint', methods=['POST'])
 def delete_wrapped_checkpoint():
-    """Delete wrapped checkpoint files for a specific model"""
     try:
         data = request.json
         model_name = data.get('model_name')
@@ -945,7 +884,6 @@ def delete_wrapped_checkpoint():
         if not model_name:
             return jsonify({'error': 'model_name is required'}), 400
-        # Find the model directory
         config = get_config()
         models_dir = config.get_path("models_fine_tuned")
         model_dir = models_dir / model_name
@@ -953,7 +891,6 @@ def delete_wrapped_checkpoint():
         if not model_dir.exists():
             return jsonify({'error': f'Model directory not found: {model_dir}'}), 404
-        # Find and delete wrapped checkpoint files (.ckpt)
         deleted_files = []
         for ckpt_file in model_dir.glob("*.ckpt"):
             try:
@@ -976,7 +913,6 @@ def delete_wrapped_checkpoint():
 @app.route('/api/free-gpu-memory', methods=['POST'])
 def free_gpu_memory():
-    """Free GPU memory by clearing cache and stopping training processes"""
     try:
         import subprocess
         import torch
@@ -985,23 +921,18 @@ def free_gpu_memory():
         print(" FREEING GPU MEMORY...")
-        # Clear PyTorch CUDA cache
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
             print("    Cleared PyTorch CUDA cache")
-        # Clear MPS cache if available
         if hasattr(torch, 'mps') and torch.backends.mps.is_available():
             torch.mps.empty_cache()
             print("    Cleared MPS cache")
-        # Get current process ID to avoid killing ourselves
         current_pid = os.getpid()
         print(f"     Current process PID: {current_pid}")
-        # Check for training processes and stop them safely
         try:
-            # Get all CUDA processes
             result = subprocess.run(['nvidia-smi', '--query-compute-apps=pid,used_memory,process_name', '--format=csv,noheader,nounits'],
                                     capture_output=True, text=True, timeout=10)
@@ -1016,32 +947,25 @@ def free_gpu_memory():
                                 pid_int = int(pid)
                                 mem_gb = float(mem_mb) / 1024
-                                # Skip our own process
                                 if pid_int == current_pid:
                                     print(
                                         f"     Skipping current process PID: {pid_int}")
                                     continue
-                                # Check if it's a Python process using significant memory
                                 if 'python' in process_name.lower() and mem_gb > 1.0:
                                     print(
                                         f"    Found Python process PID: {pid_int} using {mem_gb:.1f}GB")
                                     print(f"      Process: {process_name}")
-                                    # Try to gracefully stop the process
                                     try:
-                                        # Send SIGTERM first (graceful)
                                         subprocess.run(
                                             ['kill', '-TERM', str(pid_int)], check=False, timeout=5)
                                         print(
                                             f"    Sent SIGTERM to PID: {pid_int}")
-                                        # Wait a moment
                                         time.sleep(2)
-                                        # Check if process is still running
                                         try:
-                                            # Check if process exists
                                             os.kill(pid_int, 0)
                                             print(
                                                 f"     Process {pid_int} still running, sending SIGKILL")
@@ -1069,18 +993,14 @@ def free_gpu_memory():
         except Exception as e:
             print(f"     Could not check CUDA processes: {e}")
-        # Wait a moment for processes to stop
         time.sleep(3)
-        # Clear cache again after stopping processes
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
             print("    Cleared PyTorch CUDA cache again")
-        # Get memory info after clearing
         memory_info = {}
         if torch.cuda.is_available():
-            # Use the same improved memory detection as the status endpoint
             total_memory = torch.cuda.get_device_properties(
                 0).total_memory / (1024**3)
             torch.cuda.synchronize()
@@ -1088,7 +1008,6 @@ def free_gpu_memory():
             cached_memory = torch.cuda.memory_reserved(0) / (1024**3)
             free_memory = total_memory - allocated_memory
-            # Get nvidia-smi info
             try:
                 result = subprocess.run(['nvidia-smi', '--query-gpu=memory.used,memory.total', '--format=csv,noheader,nounits'],
                                         capture_output=True, text=True, timeout=5)
@@ -1107,7 +1026,7 @@ def free_gpu_memory():
                 nvidia_total_gb = total_memory
                 nvidia_free_gb = total_memory
-            # Use the most accurate reading
             if allocated_memory > 0:
                 final_allocated = allocated_memory
                 final_free = free_memory
@@ -1146,7 +1065,6 @@ def free_gpu_memory():
 @app.route('/api/toggle-debug', methods=['POST'])
 def toggle_debug():
-    """Toggle debug mode for GPU memory logging"""
     global DEBUG_MODE
     try:
         data = request.json
@@ -1166,14 +1084,12 @@ def toggle_debug():
 @app.route('/api/debug-status', methods=['GET'])
 def get_debug_status():
-    """Get current debug mode status"""
     return jsonify({
         'debug_mode': DEBUG_MODE,
         'message': f"Debug mode is {'enabled' if DEBUG_MODE else 'disabled'}"
     })
-# Add API call statistics for debugging
 _api_call_stats = {
     'gpu_memory_status': 0,
     'status': 0,
@@ -1183,18 +1099,15 @@ _api_call_stats = {
 def _log_api_call(endpoint):
-    """Log API call for debugging"""
     global _api_call_stats
     _api_call_stats[endpoint] = _api_call_stats.get(endpoint, 0) + 1
-    # Reset stats every hour
     if time.time() - _api_call_stats['last_reset'] > 3600:
         _api_call_stats = {endpoint: 1, 'last_reset': time.time()}
 @app.route('/api/debug-stats', methods=['GET'])
 def get_debug_stats():
-    """Get API call statistics for debugging"""
     return jsonify({
         'api_call_stats': _api_call_stats,
         'uptime_hours': (time.time() - _api_call_stats['last_reset']) / 3600,
@@ -1206,19 +1119,16 @@ def get_debug_stats():
     })
-# Add caching for GPU memory status to reduce overhead
 _gpu_memory_cache = {}
 _gpu_memory_cache_time = 0
-_gpu_memory_cache_duration = 2.0  # Cache for 2 seconds
-# Throttle memory warnings (only show every 30 seconds)
 _last_memory_warning_time = 0
-_memory_warning_interval = 30  # seconds
 @app.route('/api/open-output-folder', methods=['POST'])
 def open_output_folder():
-    """Open the output folder in the system file explorer"""
     try:
         import subprocess
         import platform
@@ -1241,7 +1151,6 @@ def open_output_folder():
 @app.route('/api/open-documentation', methods=['POST'])
 def open_documentation():
-    """Open selected public Fragmenta links in the system browser."""
     try:
         import webbrowser
@@ -1272,12 +1181,10 @@ def open_documentation():
         logger.error(f"Error opening documentation: {e}")
         return jsonify({"success": False, "error": str(e)}), 500
-# Global flag for welcome page state
 _welcome_page_closed = False
 @app.route('/api/welcome-page-closed', methods=['POST'])
 def welcome_page_closed():
-    """Signal that the welcome page has been closed"""
     global _welcome_page_closed
     try:
         _welcome_page_closed = True
@@ -1289,26 +1196,21 @@ def welcome_page_closed():
 @app.route('/api/welcome-page-status', methods=['GET'])
 def get_welcome_page_status():
-    """Check if welcome page has been closed"""
     global _welcome_page_closed
     return jsonify({"closed": _welcome_page_closed})
 @app.route('/api/license-info', methods=['GET'])
 def get_license_info():
-    """Get license and attribution information"""
     try:
         project_root = Path(__file__).parent.parent.parent
-        # Read LICENSE file
         license_file = project_root / "LICENSE"
         license_text = ""
         if license_file.exists():
             with open(license_file, 'r', encoding='utf-8') as f:
-                # Read first 50 lines for summary
                 lines = f.readlines()[:50]
                 license_text = ''.join(lines)
-        # Read NOTICE.md for attribution info
         notice_file = project_root / "NOTICE.md"
         notice_text = ""
         if notice_file.exists():
@@ -1332,7 +1234,6 @@ def get_license_info():
 @app.route('/api/models-status', methods=['GET'])
 def get_models_status():
-    """Check if required models exist and if auth dialog should be shown"""
     try:
         required_models = ['stable-audio-open-small', 'stable-audio-open-1.0']
         downloaded_models = [
@@ -1377,11 +1278,9 @@ def get_models_status():
 @app.route('/api/gpu-memory-status', methods=['GET'])
 def get_gpu_memory_status():
-    """Get current GPU memory status with caching to reduce overhead"""
     _log_api_call('gpu_memory_status')
     global _gpu_memory_cache, _gpu_memory_cache_time
-    # Check cache first
     current_time = time.time()
     if current_time - _gpu_memory_cache_time < _gpu_memory_cache_duration:
         return jsonify({'memory_info': _gpu_memory_cache})
@@ -1393,42 +1292,35 @@ def get_gpu_memory_status():
         memory_info = {}
         if torch.cuda.is_available():
-            # Get PyTorch memory info with better tracking
             total_memory = torch.cuda.get_device_properties(
                 0).total_memory / (1024**3)
-            # Force PyTorch to synchronize before reading memory
             torch.cuda.synchronize()
             allocated_memory = torch.cuda.memory_allocated(0) / (1024**3)
             cached_memory = torch.cuda.memory_reserved(0) / (1024**3)
             free_memory = total_memory - allocated_memory
-            # Get nvidia-smi info for comparison (only if PyTorch shows 0 usage)
             nvidia_used_gb = 0
             nvidia_total_gb = total_memory
             nvidia_free_gb = total_memory
             if allocated_memory == 0:
                 try:
                     result = subprocess.run(['nvidia-smi', '--query-gpu=memory.used,memory.total', '--format=csv,noheader,nounits'],
-                                            capture_output=True, text=True, timeout=1)  # Add timeout
                     if result.stdout.strip():
                         used_mb, total_mb = result.stdout.strip().split(', ')
                         nvidia_used_gb = float(used_mb) / 1024
                         nvidia_total_gb = float(total_mb) / 1024
                         nvidia_free_gb = nvidia_total_gb - nvidia_used_gb
                 except Exception as e:
-                    # Only log if there's an actual error, not just missing nvidia-smi
                     if "Could not get nvidia-smi info" not in str(e):
                         print(f"GPU Memory Error: {e}")
-            # Get CUDA capability and device info
             cuda_capability = torch.cuda.get_device_capability(0)
             device_name = torch.cuda.get_device_name(0)
-            # Use the most accurate memory reading
-            # If PyTorch shows 0 but nvidia-smi shows usage, use nvidia-smi
-            # If PyTorch shows usage, use PyTorch
             if allocated_memory > 0:
                 final_allocated = allocated_memory
                 final_cached = cached_memory
@@ -1436,7 +1328,7 @@ def get_gpu_memory_status():
                 memory_source = "PyTorch"
             else:
                 final_allocated = nvidia_used_gb
-                final_cached = cached_memory  # Keep PyTorch cached
                 final_free = nvidia_free_gb
                 memory_source = "nvidia-smi"
@@ -1453,19 +1345,17 @@ def get_gpu_memory_status():
                 'nvidia_used': nvidia_used_gb
             }
-            # Only log if there are significant issues AND enough time has passed
             global _last_memory_warning_time
             if (current_time - _last_memory_warning_time) > _memory_warning_interval:
-                if final_allocated > 10.0:  # More than 10GB used
                     print(
                         f"  High GPU Memory Usage: {final_allocated:.2f}GB allocated, {final_free:.2f}GB free")
                     _last_memory_warning_time = current_time
-                elif final_free < 1.0:  # Less than 1GB free
                     print(
                         f"  Low GPU Memory: {final_free:.2f}GB free, {final_allocated:.2f}GB allocated")
                     _last_memory_warning_time = current_time
         else:
-            # CPU fallback
             memory_info['cpu'] = {
                 'total': psutil.virtual_memory().total / (1024**3),
                 'available': psutil.virtual_memory().available / (1024**3),
@@ -1474,7 +1364,6 @@ def get_gpu_memory_status():
                 'type': 'cpu'
             }
-        # Update cache
         _gpu_memory_cache = memory_info
         _gpu_memory_cache_time = current_time
@@ -1484,9 +1373,6 @@ def get_gpu_memory_status():
         return jsonify({'error': str(e)}), 500
-# ---------------------------------------------------------------------------
-# Bulk auto-annotation
-# ---------------------------------------------------------------------------
 _annotate_job_lock = threading.Lock()
 _annotate_job = {
     'state': 'idle',   # idle | running | done | error
@@ -1514,9 +1400,64 @@ def _clap_ckpt_path():
     return clap_checkpoint_path(get_config().get_path('models_pretrained'))
 @app.route('/api/pick-folder', methods=['POST'])
 def pick_folder():
-    """Open a native folder-picker dialog on the host and return the chosen path."""
     import subprocess
     import shutil as _shutil
@@ -1747,10 +1688,8 @@ def bulk_annotate_unload_clap():
 @app.route('/shutdown', methods=['POST'])
 def shutdown():
-    """Shutdown the Flask server gracefully"""
     try:
         print(" Shutting down Flask server...")
-        # Use a function to shutdown the server
         func = request.environ.get('werkzeug.server.shutdown')
         if func is None:
             raise RuntimeError('Not running with the Werkzeug Server')
@@ -1761,7 +1700,6 @@ def shutdown():
 if __name__ == '__main__':
-    # 0.0.0.0: reachable at this machine's LAN/Tailscale IPs (e.g. http://100.122.31.32:5001).
     host = os.environ.get('FLASK_HOST', '0.0.0.0')
     port = int(os.environ.get('FLASK_PORT', '5001'))
     app.run(debug=True, host=host, port=port)

 DEBUG_MODE = os.environ.get('FRAGMENTA_DEBUG', 'false').lower() == 'true'
 config = None
 audio_processor = None
 generator = None
 def _ensure_components():
     global config, audio_processor, generator, model_manager
     global _components_initialised, _init_error
 @app.before_request
 def lazy_init():
     if request.path == '/api/health':
+        return
     try:
         _ensure_components()
     except Exception as e:
         if request.path.startswith('/api/'):
             return jsonify({'error': f'Backend not ready: {e}'}), 503
         return None
 @app.route('/api/health')
 def health_check():
     import torch
     status = {
         'status': 'ok' if _components_initialised else 'degraded',
         'gpu_available': torch.cuda.is_available(),
         'gpu_name': torch.cuda.get_device_name(0) if torch.cuda.is_available() else None,
     }
     # Return 200 even in degraded mode so Docker HEALTHCHECK doesn't kill
+    # the container before components finish loading.
     return jsonify(status), 200
         chunks_preview_data = []
         for filename, prompt in prompts_data:
+            chunks_preview_data.append([filename, filename, prompt, "original"])
+        # Merge into existing metadata instead of overwriting, so repeated
+        # uploads accumulate into one dataset.
         json_path = Path(config.get_metadata_json_path())
         existing_metadata = []
         if json_path.exists():
             try:
                 with open(json_path, 'r', encoding='utf-8') as f:
             'message': f'Files saved successfully! {len(saved_files)} original files saved to data folder',
             'saved_files': saved_files,
             'processed_count': len(saved_files),
+            'chunks_preview': chunks_preview_data,
             'data_folder': str(data_dir),
             'metadata_json': str(json_path),
             'approach': 'original_files_only'
         config_file = None
         model_file_path = None
+        # Priority: unwrapped_model_path > model_path > base model.
         if unwrapped_model_path:
             model_file_path = Path(unwrapped_model_path)
             if not model_file_path.exists():
                     f"model_path:{model_name}", str(model_file_path))
             logger.debug(f"Using model path: {model_file_path}")
+        # Small and full models use different configs; pick by file size when the name is ambiguous.
         if model_file_path:
             file_size_gb = model_file_path.stat().st_size / (1024**3)
             config_file = "model_config_small.json" if file_size_gb < 2.0 else "model_config.json"
     logger.info(f"Starting generation with config: {config_file}")
     try:
         if determined_model_path and determined_model_path.exists():
             output_path = generator.generate_audio(
                 prompt,
                 unwrapped_model_path=unwrapped_model_path if unwrapped_model_path else None,
                 duration=duration
             )
         elif model_name in ['stable-audio-open-small', 'stable-audio-open-1.0']:
             model_file_mapping = {
                 'stable-audio-open-small': 'stable-audio-open-small-model.safetensors',
                 'stable-audio-open-1.0': 'stable-audio-open-model.safetensors'
                 has_checkpoint = len(checkpoint_files) > 0
                 has_config = len(config_files) > 0
                 checkpoints = []
                 for ckpt_file in checkpoint_files:
                     import re
                     name = ckpt_file.stem
                     epoch_match = re.search(r'epoch=(\d+)', name)
                     checkpoint_info = {
                         'name': name,
                         'path': str(ckpt_file.relative_to(config.project_root)),
                         'size_mb': round(ckpt_file.stat().st_size / (1024 * 1024), 1),
                         'created': ckpt_file.stat().st_mtime
                     checkpoints.append(checkpoint_info)
                 checkpoints.sort(key=lambda x: x['created'], reverse=True)
                 latest_checkpoint = max(checkpoint_files, key=lambda x: x.stat(
                 ).st_mtime) if checkpoint_files else None
                 latest_config = max(
                     config_files, key=lambda x: x.stat().st_mtime) if config_files else None
                 unwrapped_dir = model_dir / "unwrapped"
                 unwrapped_models = []
                 if unwrapped_dir.exists():
                     for unwrapped_file in unwrapped_dir.glob("*.safetensors"):
                         unwrapped_models.append({
                             'name': unwrapped_file.stem,
                             'path': str(unwrapped_file.relative_to(config.project_root)),
                             'size_mb': round(unwrapped_file.stat().st_size / (1024 * 1024), 1),
                             'created': unwrapped_file.stat().st_mtime
                         })
                     unwrapped_models.sort(
                         key=lambda x: x['created'], reverse=True)
+                # Fine-tuned models reuse the base model's config for unwrapping.
+                base_config_path = "models/config/model_config_small.json"
                 models.append({
                     'name': model_dir.name,
                     'path': str(model_dir.relative_to(config.project_root)),
                     'has_checkpoint': has_checkpoint,
                     'has_config': has_config,
                     'ckpt_path': str(latest_checkpoint.relative_to(config.project_root)) if latest_checkpoint else None,
+                    'config_path': base_config_path,
+                    'checkpoints': checkpoints,
                     'unwrapped_models': unwrapped_models,
                     'created': model_dir.stat().st_mtime if model_dir.exists() else None
                 })
 @app.route('/api/models/available', methods=['GET'])
 def get_available_models():
     try:
         models = model_manager.get_available_models()
         return jsonify({'models': models})
 @app.route('/api/models/<model_id>/info', methods=['GET'])
 def get_model_info(model_id):
     try:
         model_info = model_manager.get_model_info(model_id)
         if not model_info:
 @app.route('/api/models/<model_id>/accept-terms', methods=['POST'])
 def accept_model_terms(model_id):
     try:
         success = model_manager.accept_terms(model_id)
         if success:
 @app.route('/api/models/<model_id>/download', methods=['POST'])
 def download_model(model_id):
     try:
         if not model_manager.is_terms_accepted(model_id):
             return jsonify({'error': 'Terms not accepted for this model'}), 400
         success = model_manager.download_model(model_id)
         if success:
             return jsonify({
 @app.route('/api/hf-login', methods=['POST'])
 def hf_login():
     try:
         data = request.json
         token = data.get('token')
 @app.route('/api/base-models/status', methods=['GET'])
 def get_base_models_status():
     try:
         import os
         from pathlib import Path
         base_models = {
             'stable-audio-open-1.0': {
                 'name': 'Stable Audio Open 1.0',
+                'path': 'models/pretrained',
+                'file': 'stable-audio-open-model.safetensors',
                 'downloaded': False
             },
             'stable-audio-open-small': {
+                'name': 'Stable Audio Open Small',
+                'path': 'models/pretrained',
+                'file': 'stable-audio-open-small-model.safetensors',
                 'downloaded': False
             }
         }
         for model_id, info in base_models.items():
             model_dir = Path(info['path'])
             model_file = model_dir / info['file']
             if model_file.exists() and model_file.is_file():
                 info['downloaded'] = True
             else:
+                # Legacy layout: model stored in a subdirectory.
                 old_path = model_dir / model_id
                 if old_path.exists() and old_path.is_dir():
                     has_files = any([
 @app.route('/api/models/<model_id>/delete', methods=['DELETE'])
 def delete_model(model_id):
     try:
         success = model_manager.delete_model(model_id)
         if success:
 @app.route('/api/models/storage', methods=['GET'])
 def get_model_storage():
     try:
         storage_info = model_manager.get_storage_info()
         return jsonify(storage_info)
 @app.route('/api/start-fresh', methods=['POST'])
 def start_fresh():
     try:
         config = get_config()
         data_dir = config.get_path("data")
         config_dir = config.get_path("models_config")
         data_files_deleted = 0
         if data_dir.exists():
             for file_path in data_dir.glob("*"):
+                if file_path.is_file() and not file_path.name.endswith('.py'):
                     file_path.unlink()
                     data_files_deleted += 1
         config_files_deleted = 0
         if config_dir.exists():
             for file_path in config_dir.glob("custom_metadata.py"):
                     file_path.unlink()
                     config_files_deleted += 1
         data_dir.mkdir(exist_ok=True, parents=True)
         return jsonify({
 @app.route('/api/unwrap-model', methods=['POST'])
 def unwrap_model():
     try:
         data = request.json
         model_config = data.get('model_config')
         if not model_config or not ckpt_path:
             return jsonify({'error': 'model_config and ckpt_path are required'}), 400
         import subprocess
         from pathlib import Path
         config = get_config()
         repo_root = config.project_root
         model_config_path = repo_root / \
             model_config if not Path(
                 model_config).is_absolute() else Path(model_config)
         ckpt_path_resolved = repo_root / \
             ckpt_path if not Path(ckpt_path).is_absolute() else Path(ckpt_path)
         if not model_config_path.exists():
             return jsonify({'error': f'Model config not found: {model_config_path}'}), 400
         if not ckpt_path_resolved.exists():
             return jsonify({'error': f'Checkpoint not found: {ckpt_path_resolved}'}), 400
         model_dir = ckpt_path_resolved.parent
         unwrapped_dir = model_dir / "unwrapped"
         unwrapped_dir.mkdir(exist_ok=True)
         cmd = [
             sys.executable, 'unwrap_model.py',
             '--model-config', str(model_config_path),
             '--ckpt-path', str(ckpt_path_resolved),
             '--use-safetensors'
         ]
+        # unwrap_model.py writes next to its CWD, so run from stable-audio-tools/.
         stable_audio_dir = repo_root / "stable-audio-tools"
         proc = subprocess.run(cmd, cwd=stable_audio_dir,
                               capture_output=True, text=True)
         if proc.returncode == 0:
             import glob
             pattern = str(stable_audio_dir / f"{name}*.safetensors")
             created_files = glob.glob(pattern)
                 target_path = unwrapped_dir / created_path.name
                 try:
                     created_path.rename(target_path)
                     moved_files.append(str(target_path))
                     print(f"Moved {created_path.name} to {target_path}")
                 except Exception as e:
                     print(f"Error moving {created_path}: {e}")
             unwrapped_files = list(unwrapped_dir.glob("*.safetensors"))
             return jsonify({
 @app.route('/api/delete-checkpoint', methods=['POST'])
 def delete_checkpoint():
     try:
         data = request.json
         checkpoint_path = data.get('checkpoint_path')
         if not checkpoint_path:
             return jsonify({'error': 'checkpoint_path is required'}), 400
         config = get_config()
         repo_root = config.project_root
         ckpt_path_resolved = repo_root / \
             checkpoint_path if not Path(
                 checkpoint_path).is_absolute() else Path(checkpoint_path)
         if not ckpt_path_resolved.exists():
             return jsonify({'error': f'Checkpoint file not found: {ckpt_path_resolved}'}), 404
+        # Restrict deletion to .ckpt to avoid accidental loss of unwrapped models.
         if not ckpt_path_resolved.suffix == '.ckpt':
             return jsonify({'error': f'Only .ckpt files can be deleted: {ckpt_path_resolved}'}), 400
 @app.route('/api/delete-wrapped-checkpoint', methods=['POST'])
 def delete_wrapped_checkpoint():
     try:
         data = request.json
         model_name = data.get('model_name')
         if not model_name:
             return jsonify({'error': 'model_name is required'}), 400
         config = get_config()
         models_dir = config.get_path("models_fine_tuned")
         model_dir = models_dir / model_name
         if not model_dir.exists():
             return jsonify({'error': f'Model directory not found: {model_dir}'}), 404
         deleted_files = []
         for ckpt_file in model_dir.glob("*.ckpt"):
             try:
 @app.route('/api/free-gpu-memory', methods=['POST'])
 def free_gpu_memory():
     try:
         import subprocess
         import torch
         print(" FREEING GPU MEMORY...")
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
             print("    Cleared PyTorch CUDA cache")
         if hasattr(torch, 'mps') and torch.backends.mps.is_available():
             torch.mps.empty_cache()
             print("    Cleared MPS cache")
         current_pid = os.getpid()
         print(f"     Current process PID: {current_pid}")
         try:
             result = subprocess.run(['nvidia-smi', '--query-compute-apps=pid,used_memory,process_name', '--format=csv,noheader,nounits'],
                                     capture_output=True, text=True, timeout=10)
                                 pid_int = int(pid)
                                 mem_gb = float(mem_mb) / 1024
                                 if pid_int == current_pid:
                                     print(
                                         f"     Skipping current process PID: {pid_int}")
                                     continue
                                 if 'python' in process_name.lower() and mem_gb > 1.0:
                                     print(
                                         f"    Found Python process PID: {pid_int} using {mem_gb:.1f}GB")
                                     print(f"      Process: {process_name}")
                                     try:
                                         subprocess.run(
                                             ['kill', '-TERM', str(pid_int)], check=False, timeout=5)
                                         print(
                                             f"    Sent SIGTERM to PID: {pid_int}")
                                         time.sleep(2)
                                         try:
                                             os.kill(pid_int, 0)
                                             print(
                                                 f"     Process {pid_int} still running, sending SIGKILL")
         except Exception as e:
             print(f"     Could not check CUDA processes: {e}")
         time.sleep(3)
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
             print("    Cleared PyTorch CUDA cache again")
         memory_info = {}
         if torch.cuda.is_available():
             total_memory = torch.cuda.get_device_properties(
                 0).total_memory / (1024**3)
             torch.cuda.synchronize()
             cached_memory = torch.cuda.memory_reserved(0) / (1024**3)
             free_memory = total_memory - allocated_memory
             try:
                 result = subprocess.run(['nvidia-smi', '--query-gpu=memory.used,memory.total', '--format=csv,noheader,nounits'],
                                         capture_output=True, text=True, timeout=5)
                 nvidia_total_gb = total_memory
                 nvidia_free_gb = total_memory
+            # PyTorch sometimes reports 0 for externally-allocated memory; fall back to nvidia-smi.
             if allocated_memory > 0:
                 final_allocated = allocated_memory
                 final_free = free_memory
 @app.route('/api/toggle-debug', methods=['POST'])
 def toggle_debug():
     global DEBUG_MODE
     try:
         data = request.json
 @app.route('/api/debug-status', methods=['GET'])
 def get_debug_status():
     return jsonify({
         'debug_mode': DEBUG_MODE,
         'message': f"Debug mode is {'enabled' if DEBUG_MODE else 'disabled'}"
     })
 _api_call_stats = {
     'gpu_memory_status': 0,
     'status': 0,
 def _log_api_call(endpoint):
     global _api_call_stats
     _api_call_stats[endpoint] = _api_call_stats.get(endpoint, 0) + 1
     if time.time() - _api_call_stats['last_reset'] > 3600:
         _api_call_stats = {endpoint: 1, 'last_reset': time.time()}
 @app.route('/api/debug-stats', methods=['GET'])
 def get_debug_stats():
     return jsonify({
         'api_call_stats': _api_call_stats,
         'uptime_hours': (time.time() - _api_call_stats['last_reset']) / 3600,
     })
 _gpu_memory_cache = {}
 _gpu_memory_cache_time = 0
+_gpu_memory_cache_duration = 2.0
 _last_memory_warning_time = 0
+_memory_warning_interval = 30
 @app.route('/api/open-output-folder', methods=['POST'])
 def open_output_folder():
     try:
         import subprocess
         import platform
 @app.route('/api/open-documentation', methods=['POST'])
 def open_documentation():
     try:
         import webbrowser
         logger.error(f"Error opening documentation: {e}")
         return jsonify({"success": False, "error": str(e)}), 500
 _welcome_page_closed = False
 @app.route('/api/welcome-page-closed', methods=['POST'])
 def welcome_page_closed():
     global _welcome_page_closed
     try:
         _welcome_page_closed = True
 @app.route('/api/welcome-page-status', methods=['GET'])
 def get_welcome_page_status():
     global _welcome_page_closed
     return jsonify({"closed": _welcome_page_closed})
 @app.route('/api/license-info', methods=['GET'])
 def get_license_info():
     try:
         project_root = Path(__file__).parent.parent.parent
         license_file = project_root / "LICENSE"
         license_text = ""
         if license_file.exists():
             with open(license_file, 'r', encoding='utf-8') as f:
                 lines = f.readlines()[:50]
                 license_text = ''.join(lines)
         notice_file = project_root / "NOTICE.md"
         notice_text = ""
         if notice_file.exists():
 @app.route('/api/models-status', methods=['GET'])
 def get_models_status():
     try:
         required_models = ['stable-audio-open-small', 'stable-audio-open-1.0']
         downloaded_models = [
 @app.route('/api/gpu-memory-status', methods=['GET'])
 def get_gpu_memory_status():
     _log_api_call('gpu_memory_status')
     global _gpu_memory_cache, _gpu_memory_cache_time
     current_time = time.time()
     if current_time - _gpu_memory_cache_time < _gpu_memory_cache_duration:
         return jsonify({'memory_info': _gpu_memory_cache})
         memory_info = {}
         if torch.cuda.is_available():
             total_memory = torch.cuda.get_device_properties(
                 0).total_memory / (1024**3)
             torch.cuda.synchronize()
             allocated_memory = torch.cuda.memory_allocated(0) / (1024**3)
             cached_memory = torch.cuda.memory_reserved(0) / (1024**3)
             free_memory = total_memory - allocated_memory
             nvidia_used_gb = 0
             nvidia_total_gb = total_memory
             nvidia_free_gb = total_memory
+            # PyTorch reports 0 when memory is held by other processes; ask nvidia-smi instead.
             if allocated_memory == 0:
                 try:
                     result = subprocess.run(['nvidia-smi', '--query-gpu=memory.used,memory.total', '--format=csv,noheader,nounits'],
+                                            capture_output=True, text=True, timeout=1)
                     if result.stdout.strip():
                         used_mb, total_mb = result.stdout.strip().split(', ')
                         nvidia_used_gb = float(used_mb) / 1024
                         nvidia_total_gb = float(total_mb) / 1024
                         nvidia_free_gb = nvidia_total_gb - nvidia_used_gb
                 except Exception as e:
                     if "Could not get nvidia-smi info" not in str(e):
                         print(f"GPU Memory Error: {e}")
             cuda_capability = torch.cuda.get_device_capability(0)
             device_name = torch.cuda.get_device_name(0)
             if allocated_memory > 0:
                 final_allocated = allocated_memory
                 final_cached = cached_memory
                 memory_source = "PyTorch"
             else:
                 final_allocated = nvidia_used_gb
+                final_cached = cached_memory
                 final_free = nvidia_free_gb
                 memory_source = "nvidia-smi"
                 'nvidia_used': nvidia_used_gb
             }
             global _last_memory_warning_time
             if (current_time - _last_memory_warning_time) > _memory_warning_interval:
+                if final_allocated > 10.0:
                     print(
                         f"  High GPU Memory Usage: {final_allocated:.2f}GB allocated, {final_free:.2f}GB free")
                     _last_memory_warning_time = current_time
+                elif final_free < 1.0:
                     print(
                         f"  Low GPU Memory: {final_free:.2f}GB free, {final_allocated:.2f}GB allocated")
                     _last_memory_warning_time = current_time
         else:
             memory_info['cpu'] = {
                 'total': psutil.virtual_memory().total / (1024**3),
                 'available': psutil.virtual_memory().available / (1024**3),
                 'type': 'cpu'
             }
         _gpu_memory_cache = memory_info
         _gpu_memory_cache_time = current_time
         return jsonify({'error': str(e)}), 500
 _annotate_job_lock = threading.Lock()
 _annotate_job = {
     'state': 'idle',   # idle | running | done | error
     return clap_checkpoint_path(get_config().get_path('models_pretrained'))
+@app.route('/api/environment', methods=['GET'])
+def environment():
+    return jsonify({
+        'docker': os.environ.get('FRAGMENTA_DOCKER', '0') == '1',
+    })
+@app.route('/api/upload-folder', methods=['POST'])
+def upload_folder():
+    # Browser-native folder upload path for containerised deployments
+    # (e.g. HF Space) where no display server is available for a native dialog.
+    audio_exts = {'.wav', '.mp3', '.flac', '.m4a', '.ogg', '.aac'}
+    files = request.files.getlist('files')
+    rel_paths = request.form.getlist('rel_paths')
+    if not files:
+        return jsonify({'error': 'No files uploaded.'}), 400
+    if len(rel_paths) != len(files):
+        return jsonify({'error': 'rel_paths count does not match files count.'}), 400
+    first_rel = (rel_paths[0] or '').replace('\\', '/').lstrip('/')
+    folder_name = first_rel.split('/', 1)[0] if '/' in first_rel else 'folder'
+    safe_folder = ''.join(c for c in folder_name if c.isalnum() or c in '-_') or 'folder'
+    staging_root = get_config().get_path('data') / 'uploads'
+    staging_root.mkdir(parents=True, exist_ok=True)
+    target_dir = staging_root / f"{int(time.time())}-{safe_folder}"
+    target_dir.mkdir(parents=True, exist_ok=True)
+    saved = 0
+    for file_obj, rel in zip(files, rel_paths):
+        rel_norm = (rel or file_obj.filename or '').replace('\\', '/').lstrip('/')
+        if not rel_norm or '..' in rel_norm.split('/'):
+            continue
+        if Path(rel_norm).suffix.lower() not in audio_exts:
+            continue
+        dest = (target_dir / rel_norm).resolve()
+        try:
+            dest.relative_to(target_dir.resolve())
+        except ValueError:
+            continue
+        dest.parent.mkdir(parents=True, exist_ok=True)
+        file_obj.save(dest)
+        saved += 1
+    if saved == 0:
+        import shutil
+        shutil.rmtree(target_dir, ignore_errors=True)
+        return jsonify({'error': 'No audio files found in the selected folder.'}), 400
+    return jsonify({'path': str(target_dir), 'file_count': saved})
 @app.route('/api/pick-folder', methods=['POST'])
 def pick_folder():
     import subprocess
     import shutil as _shutil
 @app.route('/shutdown', methods=['POST'])
 def shutdown():
     try:
         print(" Shutting down Flask server...")
         func = request.environ.get('werkzeug.server.shutdown')
         if func is None:
             raise RuntimeError('Not running with the Werkzeug Server')
 if __name__ == '__main__':
     host = os.environ.get('FLASK_HOST', '0.0.0.0')
     port = int(os.environ.get('FLASK_PORT', '5001'))
     app.run(debug=True, host=host, port=port)

app/backend/data/simple_audio_processor.py CHANGED Viewed

@@ -9,7 +9,6 @@ logger = logging.getLogger(__name__)
 def fast_scandir(dir_path, ext_list):
     import os
     subfolders, files = [], []
-    # add starting period to extensions if needed
     ext_list = ['.'+x if x[0] != '.' else x for x in ext_list]
     try:
@@ -39,8 +38,7 @@ class SimpleAudioProcessor:
     def __init__(self, model_config_path: Optional[Path] = None):
         self.audio_extensions = (".wav", ".mp3", ".flac", ".m4a")
-        # Load model config for info only
         if model_config_path and model_config_path.exists():
             with open(model_config_path, 'r') as f:
                 model_config = json.load(f)
@@ -48,7 +46,6 @@ class SimpleAudioProcessor:
             self.sample_rate = model_config.get("sample_rate", 44100)
             self.audio_channels = model_config.get("audio_channels", 2)
         else:
-            # Defaults
             self.sample_size = 2097152
             self.sample_rate = 44100
             self.audio_channels = 2
@@ -72,7 +69,6 @@ class SimpleAudioProcessor:
         output_dir: Path,
         prompts_file: Optional[Path] = None
     ) -> Dict[str, Any]:
-        # Find audio files
         audio_files = []
         for ext in self.audio_extensions:
             _, files = fast_scandir(str(input_dir), [ext[1:]])
@@ -83,37 +79,34 @@ class SimpleAudioProcessor:
         logger.info(f"Found {len(audio_files)} audio files")
-        # Create output directory
         output_dir.mkdir(exist_ok=True, parents=True)
-        # Copy files to output directory (only if different directories)
         if input_dir != output_dir:
             import shutil
             for audio_file in audio_files:
                 src_path = Path(audio_file)
                 dst_path = output_dir / src_path.name
                 if not dst_path.exists() or dst_path.stat().st_size != src_path.stat().st_size:
                     shutil.copy2(src_path, dst_path)
                     logger.info(f"Copied {src_path.name}")
         else:
             logger.info("Input and output directories are the same - no copying needed")
-        # Create simple dataset config
         dataset_config = {
             "dataset_type": "audio_dir",
             "datasets": [
                 {
-                    "id": "custom_dataset",
                     "path": str(output_dir),
                     "custom_metadata_module": "custom_metadata"
                 }
             ],
-            "random_crop": True,  # CRITICAL - enables random cropping during training
             "drop_last": True
         }
-        # Save prompts if provided
         if prompts_file and prompts_file.exists():
             prompts = self.load_prompts(prompts_file)
             if prompts:

 def fast_scandir(dir_path, ext_list):
     import os
     subfolders, files = [], []
     ext_list = ['.'+x if x[0] != '.' else x for x in ext_list]
     try:
     def __init__(self, model_config_path: Optional[Path] = None):
         self.audio_extensions = (".wav", ".mp3", ".flac", ".m4a")
         if model_config_path and model_config_path.exists():
             with open(model_config_path, 'r') as f:
                 model_config = json.load(f)
             self.sample_rate = model_config.get("sample_rate", 44100)
             self.audio_channels = model_config.get("audio_channels", 2)
         else:
             self.sample_size = 2097152
             self.sample_rate = 44100
             self.audio_channels = 2
         output_dir: Path,
         prompts_file: Optional[Path] = None
     ) -> Dict[str, Any]:
         audio_files = []
         for ext in self.audio_extensions:
             _, files = fast_scandir(str(input_dir), [ext[1:]])
         logger.info(f"Found {len(audio_files)} audio files")
         output_dir.mkdir(exist_ok=True, parents=True)
         if input_dir != output_dir:
             import shutil
             for audio_file in audio_files:
                 src_path = Path(audio_file)
                 dst_path = output_dir / src_path.name
                 if not dst_path.exists() or dst_path.stat().st_size != src_path.stat().st_size:
                     shutil.copy2(src_path, dst_path)
                     logger.info(f"Copied {src_path.name}")
         else:
             logger.info("Input and output directories are the same - no copying needed")
         dataset_config = {
             "dataset_type": "audio_dir",
             "datasets": [
                 {
+                    "id": "custom_dataset",
                     "path": str(output_dir),
                     "custom_metadata_module": "custom_metadata"
                 }
             ],
+            # random_crop is required: without it, training always sees file start.
+            "random_crop": True,
             "drop_last": True
         }
         if prompts_file and prompts_file.exists():
             prompts = self.load_prompts(prompts_file)
             if prompts:

app/core/config.py CHANGED Viewed

@@ -8,18 +8,15 @@ class ProjectConfig:
     def __init__(self, project_root: Optional[Path] = None) -> None:
         if getattr(sys, 'frozen', False):
-            # Running in PyInstaller bundle
             self.frozen = True
-            # sys._MEIPASS is where PyInstaller unpacks the bundle
             self.project_root = Path(sys._MEIPASS)
-            # For writable data, use a user directory
             if sys.platform == "win32":
                 self.user_data_dir = Path(os.environ["APPDATA"]) / "FragmentaDesktop"
             elif sys.platform == "darwin":
                 self.user_data_dir = Path.home() / "Library" / "Application Support" / "FragmentaDesktop"
             else:
-                # Linux/Unix
                 self.user_data_dir = Path.home() / ".local" / "share" / "FragmentaDesktop"
             self.user_data_dir.mkdir(parents=True, exist_ok=True)
@@ -44,8 +41,9 @@ class ProjectConfig:
             self.project_root: Path = Path(project_root).resolve()
             self.user_data_dir = self.project_root
         self.paths: Dict[str, Path] = {
-            # Writable paths - go to user_data_dir in frozen mode
             "models": self.user_data_dir / "models",
             "models_config": self.user_data_dir / "models" / "config",
             "models_pretrained": self.user_data_dir / "models" / "pretrained",
@@ -53,8 +51,7 @@ class ProjectConfig:
             "data": self.user_data_dir / "data",
             "logs": self.user_data_dir / "logs",
             "output": self.user_data_dir / "output",
-            # Read-only attributes/codebase - stay in project_root
             "application": self.project_root,
             "backend": self.project_root / "app" / "backend",
             "frontend": self.project_root / "app" / "frontend",

     def __init__(self, project_root: Optional[Path] = None) -> None:
         if getattr(sys, 'frozen', False):
             self.frozen = True
+            # PyInstaller unpacks the bundle to sys._MEIPASS; writable data lives elsewhere.
             self.project_root = Path(sys._MEIPASS)
             if sys.platform == "win32":
                 self.user_data_dir = Path(os.environ["APPDATA"]) / "FragmentaDesktop"
             elif sys.platform == "darwin":
                 self.user_data_dir = Path.home() / "Library" / "Application Support" / "FragmentaDesktop"
             else:
                 self.user_data_dir = Path.home() / ".local" / "share" / "FragmentaDesktop"
             self.user_data_dir.mkdir(parents=True, exist_ok=True)
             self.project_root: Path = Path(project_root).resolve()
             self.user_data_dir = self.project_root
+        # Writable paths live under user_data_dir (diverges from project_root in frozen mode);
+        # read-only code/assets stay under project_root.
         self.paths: Dict[str, Path] = {
             "models": self.user_data_dir / "models",
             "models_config": self.user_data_dir / "models" / "config",
             "models_pretrained": self.user_data_dir / "models" / "pretrained",
             "data": self.user_data_dir / "data",
             "logs": self.user_data_dir / "logs",
             "output": self.user_data_dir / "output",
             "application": self.project_root,
             "backend": self.project_root / "app" / "backend",
             "frontend": self.project_root / "app" / "frontend",

app/core/generation/audio_generator.py CHANGED Viewed

@@ -155,23 +155,6 @@ class AudioGenerator:
         seed: int = -1,
         output_path: Optional[Path] = None
     ) -> Path:
-        """
-        Generate audio from a text prompt
-        Args:
-            prompt: Text description of the audio to generate
-            model_path: Path to fine-tuned model directory
-            unwrapped_model_path: Path to unwrapped .safetensors file
-            config_file: Model config file to use (small or large)
-            duration: Duration in seconds
-            cfg_scale: Classifier-free guidance scale
-            steps: Number of diffusion steps
-            seed: Random seed (-1 for random)
-            output_path: Optional path to save the generated audio
-        Returns:
-            Path to the generated audio file
-        """
         print(f"\nAUDIO GENERATOR: generate_audio called")
         print(f"   - Prompt: '{prompt}'")
         print(f"   - Duration: {duration}s")

         seed: int = -1,
         output_path: Optional[Path] = None
     ) -> Path:
         print(f"\nAUDIO GENERATOR: generate_audio called")
         print(f"   - Prompt: '{prompt}'")
         print(f"   - Duration: {duration}s")

app/core/model_manager.py CHANGED Viewed

@@ -223,31 +223,28 @@ class ModelManager:
                 import shutil
                 from tqdm import tqdm
                 import sys
-                # Redirect tqdm to capture progress
                 class TqdmToCallback:
                     def __init__(self, callback, file_index, total_files):
                         self.callback = callback
                         self.file_index = file_index
                         self.total_files = total_files
                         self.last_percent = 0
                     def __call__(self, t):
-                        """Returns a callback function for tqdm"""
                         def inner(bytes_amount=1):
                             if t.total:
-                                # Calculate progress: 20-90% range for all files
                                 file_progress = (t.n / t.total)
                                 overall_progress = (self.file_index + file_progress) / self.total_files
                                 percent = 20 + int(overall_progress * 70)
                                 if percent != self.last_percent:
                                     self.last_percent = percent
                                     downloaded_mb = t.n / (1024 * 1024)
                                     total_mb = t.total / (1024 * 1024)
                                     if self.callback:
                                         self.callback(
-                                            percent,
                                             f"Downloading: {downloaded_mb:.1f}MB / {total_mb:.1f}MB"
                                         )
                         return inner
@@ -273,15 +270,14 @@ class ModelManager:
                         else:
                             final_filename = f"{model_id}-{file_pattern}"
-                        # Use custom tqdm callback to intercept progress
                         tqdm_callback = TqdmToCallback(progress_callback, i, total_files)
-                        # Monkey-patch tqdm for this download
                         original_tqdm_init = tqdm.__init__
                         def patched_tqdm_init(self, *args, **kwargs):
                             original_tqdm_init(self, *args, **kwargs)
-                            # Hook into tqdm updates
                             original_update = self.update
                             def new_update(n=1):
                                 result = original_update(n)
@@ -307,7 +303,6 @@ class ModelManager:
                                 resume_download=True
                             )
                         finally:
-                            # Restore original tqdm
                             tqdm.__init__ = original_tqdm_init
                         downloaded_path = Path(downloaded_file)

                 import shutil
                 from tqdm import tqdm
                 import sys
                 class TqdmToCallback:
                     def __init__(self, callback, file_index, total_files):
                         self.callback = callback
                         self.file_index = file_index
                         self.total_files = total_files
                         self.last_percent = 0
                     def __call__(self, t):
                         def inner(bytes_amount=1):
                             if t.total:
                                 file_progress = (t.n / t.total)
                                 overall_progress = (self.file_index + file_progress) / self.total_files
                                 percent = 20 + int(overall_progress * 70)
                                 if percent != self.last_percent:
                                     self.last_percent = percent
                                     downloaded_mb = t.n / (1024 * 1024)
                                     total_mb = t.total / (1024 * 1024)
                                     if self.callback:
                                         self.callback(
+                                            percent,
                                             f"Downloading: {downloaded_mb:.1f}MB / {total_mb:.1f}MB"
                                         )
                         return inner
                         else:
                             final_filename = f"{model_id}-{file_pattern}"
                         tqdm_callback = TqdmToCallback(progress_callback, i, total_files)
+                        # hf_hub_download drives its own tqdm — monkey-patch its init/update so we
+                        # forward byte progress to progress_callback without a second progress bar.
                         original_tqdm_init = tqdm.__init__
                         def patched_tqdm_init(self, *args, **kwargs):
                             original_tqdm_init(self, *args, **kwargs)
                             original_update = self.update
                             def new_update(n=1):
                                 result = original_update(n)
                                 resume_download=True
                             )
                         finally:
                             tqdm.__init__ = original_tqdm_init
                         downloaded_path = Path(downloaded_file)

app/frontend/build/assets/index-RtS7dlIj.js ADDED Viewed

The diff for this file is too large to render. See raw diff

app/frontend/build/index.html CHANGED Viewed

@@ -26,7 +26,7 @@
     </style>
     <title>Fragmenta Desktop</title>
-    <script type="module" crossorigin src="/assets/index-D-qgc0vE.js"></script>
   </head>
   <body>
     <noscript>You need to enable JavaScript to run this app.</noscript>

     </style>
     <title>Fragmenta Desktop</title>
+    <script type="module" crossorigin src="/assets/index-RtS7dlIj.js"></script>
   </head>
   <body>
     <noscript>You need to enable JavaScript to run this app.</noscript>

app/frontend/src/components/BulkAnnotatePanel.js CHANGED Viewed

@@ -9,6 +9,7 @@ import {
     CloudDownload as CloudDownloadIcon,
     Save as SaveIcon,
     FolderOpen as FolderOpenIcon,
 } from 'lucide-react';
 import api from '../api';
@@ -24,7 +25,16 @@ export default function BulkAnnotatePanel({ onCommitted }) {
     const [message, setMessage] = useState('');
     const [error, setError] = useState('');
     const [committing, setCommitting] = useState(false);
     const pollRef = useRef(null);
     const stopPolling = useCallback(() => {
         if (pollRef.current) {
@@ -101,6 +111,37 @@ export default function BulkAnnotatePanel({ onCommitted }) {
         }
     };
     const downloadClap = async () => {
         setError('');
         try {
@@ -173,14 +214,36 @@ export default function BulkAnnotatePanel({ onCommitted }) {
                     disabled={isRunning}
                     InputProps={{ readOnly: true }}
                 />
-                <Button
-                    variant="outlined"
-                    onClick={pickFolder}
-                    startIcon={<FolderOpenIcon size={16} />}
-                    disabled={isRunning}
-                >
-                    Browse
-                </Button>
                 <FormControl size="small" sx={{ minWidth: 140 }} disabled={isRunning}>
                     <InputLabel id="tier-label">Tier</InputLabel>
                     <Select

     CloudDownload as CloudDownloadIcon,
     Save as SaveIcon,
     FolderOpen as FolderOpenIcon,
+    Upload as UploadIcon,
 } from 'lucide-react';
 import api from '../api';
     const [message, setMessage] = useState('');
     const [error, setError] = useState('');
     const [committing, setCommitting] = useState(false);
+    const [isDocker, setIsDocker] = useState(false);
+    const [uploading, setUploading] = useState(false);
     const pollRef = useRef(null);
+    const folderInputRef = useRef(null);
+    useEffect(() => {
+        api.get('/api/environment')
+            .then(({ data }) => setIsDocker(!!data?.docker))
+            .catch(() => {});
+    }, []);
     const stopPolling = useCallback(() => {
         if (pollRef.current) {
         }
     };
+    const openFolderUpload = () => {
+        setError('');
+        if (folderInputRef.current) {
+            folderInputRef.current.value = '';
+            folderInputRef.current.click();
+        }
+    };
+    const handleFolderSelected = async (event) => {
+        const fileList = Array.from(event.target.files || []);
+        if (fileList.length === 0) return;
+        setError('');
+        setUploading(true);
+        try {
+            const form = new FormData();
+            fileList.forEach((file) => {
+                form.append('files', file);
+                form.append('rel_paths', file.webkitRelativePath || file.name);
+            });
+            const { data } = await api.post('/api/upload-folder', form, {
+                headers: { 'Content-Type': 'multipart/form-data' },
+            });
+            if (data?.path) setFolderPath(data.path);
+        } catch (exc) {
+            setError(exc.response?.data?.error || exc.message);
+        } finally {
+            setUploading(false);
+        }
+    };
     const downloadClap = async () => {
         setError('');
         try {
                     disabled={isRunning}
                     InputProps={{ readOnly: true }}
                 />
+                {isDocker ? (
+                    <>
+                        <input
+                            ref={folderInputRef}
+                            type="file"
+                            webkitdirectory=""
+                            directory=""
+                            multiple
+                            style={{ display: 'none' }}
+                            onChange={handleFolderSelected}
+                        />
+                        <Button
+                            variant="outlined"
+                            onClick={openFolderUpload}
+                            startIcon={uploading ? <CircularProgress size={16} /> : <UploadIcon size={16} />}
+                            disabled={isRunning || uploading}
+                        >
+                            {uploading ? 'Uploading…' : 'Upload Folder'}
+                        </Button>
+                    </>
+                ) : (
+                    <Button
+                        variant="outlined"
+                        onClick={pickFolder}
+                        startIcon={<FolderOpenIcon size={16} />}
+                        disabled={isRunning}
+                    >
+                        Browse
+                    </Button>
+                )}
                 <FormControl size="small" sx={{ minWidth: 140 }} disabled={isRunning}>
                     <InputLabel id="tier-label">Tier</InputLabel>
                     <Select

app/frontend/src/components/HfAuthDialog.js CHANGED Viewed

@@ -34,7 +34,6 @@ const HfAuthDialog = ({ open, onClose, onModelsDownloaded }) => {
         if (open) {
             checkModelStatus();
         } else {
-            // Reset state on close
             setActiveStep(0);
             setError(null);
             setToken('');
@@ -55,7 +54,6 @@ const HfAuthDialog = ({ open, onClose, onModelsDownloaded }) => {
             setMissingModels(missing);
             if (missing.length === 0) {
-                // All models exist
                 setActiveStep(3);
             } else {
                 setActiveStep(1);
@@ -77,8 +75,7 @@ const HfAuthDialog = ({ open, onClose, onModelsDownloaded }) => {
         setError(null);
         try {
             await api.post('/api/hf-login', { token: token.trim() });
-            // If login successful, move to download
             setActiveStep(2);
             startDownloads();
         } catch (err) {
@@ -91,14 +88,10 @@ const HfAuthDialog = ({ open, onClose, onModelsDownloaded }) => {
         try {
             for (const model of missingModels) {
                 setDownloadingModel(model.name);
-                // Record terms acceptance
                 await api.post(`/api/models/${model.id}/accept-terms`);
                 await api.post(`/api/models/${model.id}/download`);
             }
-            // All done
             setActiveStep(3);
             if (onModelsDownloaded) {
                 onModelsDownloaded();
@@ -113,10 +106,9 @@ const HfAuthDialog = ({ open, onClose, onModelsDownloaded }) => {
     const handleClose = () => {
         if (isProcessing && activeStep === 2) {
-            // Cannot close while downloading
             return;
         }
-        onClose(activeStep === 3); // return true if finished successfully
     };
     const getStepContent = (stepIndex) => {

         if (open) {
             checkModelStatus();
         } else {
             setActiveStep(0);
             setError(null);
             setToken('');
             setMissingModels(missing);
             if (missing.length === 0) {
                 setActiveStep(3);
             } else {
                 setActiveStep(1);
         setError(null);
         try {
             await api.post('/api/hf-login', { token: token.trim() });
             setActiveStep(2);
             startDownloads();
         } catch (err) {
         try {
             for (const model of missingModels) {
                 setDownloadingModel(model.name);
                 await api.post(`/api/models/${model.id}/accept-terms`);
                 await api.post(`/api/models/${model.id}/download`);
             }
             setActiveStep(3);
             if (onModelsDownloaded) {
                 onModelsDownloaded();
     const handleClose = () => {
         if (isProcessing && activeStep === 2) {
             return;
         }
+        onClose(activeStep === 3);
     };
     const getStepContent = (stepIndex) => {

utils/exceptions.py CHANGED Viewed

@@ -115,7 +115,6 @@ class TrainingError(FragmentaError):
         super().__init__(message, details)
-# Exception mapping for common errors
 def map_common_exception(exception: Exception, context: str = None) -> FragmentaError:
     if isinstance(exception, FileNotFoundError):

         super().__init__(message, details)
 def map_common_exception(exception: Exception, context: str = None) -> FragmentaError:
     if isinstance(exception, FileNotFoundError):

utils/logger.py CHANGED Viewed

@@ -1,8 +1,3 @@
-"""
-Centralized Logging System for Fragmenta Desktop
-Replaces scattered print statements with structured logging
-"""
 import logging
 import sys
 from pathlib import Path
@@ -10,8 +5,6 @@ from typing import Optional
 from datetime import datetime
 import os
-# Color codes for console output
 class Colors:
     RESET = '\033[0m'
@@ -25,8 +18,6 @@ class Colors:
 class ColoredFormatter(logging.Formatter):
-    """Custom formatter that adds colors to log levels"""
     COLORS = {
         'DEBUG': Colors.CYAN,
         'INFO': Colors.GREEN,

 import logging
 import sys
 from pathlib import Path
 from datetime import datetime
 import os
 class Colors:
     RESET = '\033[0m'
 class ColoredFormatter(logging.Formatter):
     COLORS = {
         'DEBUG': Colors.CYAN,
         'INFO': Colors.GREEN,