Spaces:

krislette
/

bach-or-bot

Sleeping

App Files Files Community

krislette commited on Oct 11

Commit

253a78c

1 Parent(s): 7633e2f

Auto-deploy from GitHub: b571980d762eb701851962e3b915d9447fe4e6de

Browse files

Files changed (5) hide show

app/server.py +67 -40
app/utils.py +51 -1
poetry.lock +23 -1
pyproject.toml +2 -1
scripts/explain.py +7 -1

app/server.py CHANGED Viewed

@@ -2,10 +2,6 @@
 from fastapi import Depends, FastAPI, File, Form, HTTPException, UploadFile
 from fastapi.middleware.cors import CORSMiddleware
-# Processing imports
-import librosa
-import io
 # Utils/schemas imports
 from app.schemas import (
     ErrorResponse,
@@ -14,12 +10,17 @@ from app.schemas import (
     PredictionXAIResponse,
     WelcomeResponse,
 )
-from app.utils import load_config
 # Model/XAI-related imports
 from scripts.explain import musiclime
 from scripts.predict import predict_pipeline
 # Load config at startup
 config = load_config()
@@ -43,45 +44,63 @@ app.add_middleware(
 )
-async def validate_audio_file(audio_file: UploadFile = File(...)):
-    """Validate audio file type and size."""
-    # Check file size
-    audio_content = await audio_file.read()
-    if len(audio_content) > MAX_FILE_SIZE:
         raise HTTPException(
             status_code=400,
-            detail=f"File too large. Maximum size is {MAX_FILE_SIZE // (1024*1024)}MB.",
         )
-    # Check file type
-    if audio_file.content_type not in ALLOWED_AUDIO_TYPES:
         raise HTTPException(
             status_code=400,
-            detail=f"Invalid file type. Supported formats: {', '.join(ALLOWED_AUDIO_TYPES)}",
         )
-    # Reset file pointer for later use
-    audio_file.file.seek(0)
-    return audio_file, audio_content
-def validate_lyrics(lyrics: str = Form(...)):
-    """Validate lyrics length and content."""
-    if len(lyrics) > MAX_LYRICS_LENGTH:
         raise HTTPException(
             status_code=400,
-            detail=f"Lyrics too long. Maximum length is {MAX_LYRICS_LENGTH} characters.",
         )
-    # Basic sanitization, remove excessive whitespace
-    lyrics = lyrics.strip()
-    if not lyrics:
         raise HTTPException(
             status_code=400,
-            detail="Lyrics cannot be empty.",
         )
-    return lyrics
 @app.get("/", response_model=WelcomeResponse, tags=["Root"])
@@ -108,32 +127,36 @@ def root():
     responses={400: {"model": ErrorResponse}, 500: {"model": ErrorResponse}},
 )
 async def predict_music(
-    lyrics: str = Depends(validate_lyrics), audio_file_data=Depends(validate_audio_file)
 ):
     """
     Endpoint to predict whether a music sample is human-composed or AI-generated.
     """
     try:
-        # Get the audio file and content from sanitized and cleaned audio file
-        audio_file, audio_content = audio_file_data
-        # Load audio from uploaded file with error handling for corrupted files
         try:
             audio_data, sr = librosa.load(io.BytesIO(audio_content))
         except Exception as e:
             raise HTTPException(status_code=400, detail=f"Invalid audio file: {str(e)}")
-        # Call MLP predict runner script to get results
         results = predict_pipeline(audio_data, lyrics)
         return PredictionResponse(
             status="success",
             lyrics=lyrics,
-            audio_file_name=audio_file.filename,
-            audio_content_type=audio_file.content_type,
             audio_file_size=len(audio_content),
             results=results,
         )
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
@@ -144,32 +167,36 @@ async def predict_music(
     responses={400: {"model": ErrorResponse}, 500: {"model": ErrorResponse}},
 )
 async def predict_music_with_xai(
-    lyrics: str = Depends(validate_lyrics), audio_file_data=Depends(validate_audio_file)
 ):
     """
     Endpoint to predict whether a music sample is human-composed or AI-generated with explainability.
     """
     try:
-        # Get the audio file and content from sanitized and cleaned audio file
-        audio_file, audio_content = audio_file_data
-        # Load audio from uploaded file with error handling for corrupted files
         try:
             audio_data, sr = librosa.load(io.BytesIO(audio_content))
         except Exception as e:
             raise HTTPException(status_code=400, detail=f"Invalid audio file: {str(e)}")
-        # Call musiclime runner script to get results
         results = musiclime(audio_data, lyrics)
         return PredictionXAIResponse(
             status="success",
             lyrics=lyrics,
-            audio_file_name=audio_file.filename,
-            audio_content_type=audio_file.content_type,
             audio_file_size=len(audio_content),
             results=results,
         )
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))

 from fastapi import Depends, FastAPI, File, Form, HTTPException, UploadFile
 from fastapi.middleware.cors import CORSMiddleware
 # Utils/schemas imports
 from app.schemas import (
     ErrorResponse,
     PredictionXAIResponse,
     WelcomeResponse,
 )
+from app.utils import load_config, download_youtube_audio
 # Model/XAI-related imports
 from scripts.explain import musiclime
 from scripts.predict import predict_pipeline
+# Other imports
+import io
+import librosa
+from typing import Optional, Tuple
 # Load config at startup
 config = load_config()
 )
+def validate_lyrics(lyrics: str = Form(...)):
+    """Validate lyrics length and content."""
+    if len(lyrics) > MAX_LYRICS_LENGTH:
         raise HTTPException(
             status_code=400,
+            detail=f"Lyrics too long. Maximum length is {MAX_LYRICS_LENGTH} characters.",
         )
+    # Basic sanitization, remove excessive whitespace
+    lyrics = lyrics.strip()
+    if not lyrics:
         raise HTTPException(
             status_code=400,
+            detail="Lyrics cannot be empty.",
         )
+    return lyrics
+async def validate_audio_source(
+    audio_file: Optional[UploadFile] = File(None),
+    youtube_url: Optional[str] = Form(None),
+) -> Tuple[Optional[bytes], str, str]:
+    """
+    Validate and process audio source (either file or YouTube URL).
+    Returns: (audio_content, file_name, content_type)
+    """
+    if not audio_file and not youtube_url:
+        raise HTTPException(
+            status_code=400, detail="Either audio_file or youtube_url must be provided"
+        )
+    if audio_file and youtube_url:
+        raise HTTPException(
+            status_code=400, detail="Provide either audio_file or youtube_url, not both"
+        )
+    # Process YouTube URL
+    if youtube_url:
+        audio_content = download_youtube_audio(youtube_url)
+        return audio_content, "youtube_audio.wav", "audio/wav"
+    # Process uploaded file
+    if audio_file.content_type not in ALLOWED_AUDIO_TYPES:
         raise HTTPException(
             status_code=400,
+            detail=f"Invalid file type. Supported formats: {', '.join(ALLOWED_AUDIO_TYPES)}",
         )
+    audio_content = await audio_file.read()
+    if len(audio_content) > MAX_FILE_SIZE:
         raise HTTPException(
             status_code=400,
+            detail=f"File too large. Maximum size is {MAX_FILE_SIZE // (1024*1024)}MB.",
         )
+    return audio_content, audio_file.filename, audio_file.content_type
 @app.get("/", response_model=WelcomeResponse, tags=["Root"])
     responses={400: {"model": ErrorResponse}, 500: {"model": ErrorResponse}},
 )
 async def predict_music(
+    lyrics: str = Depends(validate_lyrics),
+    audio_data_tuple: Tuple = Depends(validate_audio_source),
 ):
     """
     Endpoint to predict whether a music sample is human-composed or AI-generated.
+    Accepts either an audio file upload or a YouTube URL.
     """
     try:
+        # Unpack validated data
+        audio_content, audio_file_name, audio_content_type = audio_data_tuple
+        # Load audio with librosa
         try:
             audio_data, sr = librosa.load(io.BytesIO(audio_content))
         except Exception as e:
             raise HTTPException(status_code=400, detail=f"Invalid audio file: {str(e)}")
+        # Call MLP predict runner script
         results = predict_pipeline(audio_data, lyrics)
         return PredictionResponse(
             status="success",
             lyrics=lyrics,
+            audio_file_name=audio_file_name,
+            audio_content_type=audio_content_type,
             audio_file_size=len(audio_content),
             results=results,
         )
+    except HTTPException:
+        raise
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
     responses={400: {"model": ErrorResponse}, 500: {"model": ErrorResponse}},
 )
 async def predict_music_with_xai(
+    lyrics: str = Depends(validate_lyrics),
+    audio_data_tuple: Tuple = Depends(validate_audio_source),
 ):
     """
     Endpoint to predict whether a music sample is human-composed or AI-generated with explainability.
+    Accepts either an audio file upload or a YouTube URL.
     """
     try:
+        # Unpack validated data
+        audio_content, audio_file_name, audio_content_type = audio_data_tuple
+        # Load audio with librosa
         try:
             audio_data, sr = librosa.load(io.BytesIO(audio_content))
         except Exception as e:
             raise HTTPException(status_code=400, detail=f"Invalid audio file: {str(e)}")
+        # Call musiclime runner script
         results = musiclime(audio_data, lyrics)
         return PredictionXAIResponse(
             status="success",
             lyrics=lyrics,
+            audio_file_name=audio_file_name,
+            audio_content_type=audio_content_type,
             audio_file_size=len(audio_content),
             results=results,
         )
+    except HTTPException:
+        raise
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))

app/utils.py CHANGED Viewed

@@ -1,5 +1,12 @@
-from pathlib import Path
 import yaml
 def load_config():
@@ -14,3 +21,46 @@ def load_config():
     with open(config_path, "r") as file:
         return yaml.safe_load(file)

+import io
+import tempfile
+import os
 import yaml
+import yt_dlp
+from fastapi import HTTPException
+from pathlib import Path
+from yt_dlp.utils import DownloadError
 def load_config():
     with open(config_path, "r") as file:
         return yaml.safe_load(file)
+def download_youtube_audio(youtube_url: str) -> bytes:
+    """
+    Download audio from YouTube URL and return as bytes.
+    """
+    try:
+        # Create a temporary directory for download
+        with tempfile.TemporaryDirectory() as temp_dir:
+            output_path = os.path.join(temp_dir, "audio.mp3")
+            # yt-dlp options for best audio quality
+            ydl_opts = {
+                "format": "bestaudio/best",
+                "postprocessors": [
+                    {
+                        "key": "FFmpegExtractAudio",
+                        "preferredcodec": "mp3",
+                        "preferredquality": "192",
+                    }
+                ],
+                "outtmpl": output_path.replace(".mp3", ""),
+                "quiet": True,
+                "no_warnings": True,
+            }
+            # Download the audio
+            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+                ydl.download([youtube_url])
+            # Read the downloaded file
+            with open(output_path, "rb") as file:
+                audio_content = file.read()
+            return audio_content
+    except DownloadError as e:
+        raise HTTPException(
+            status_code=400, detail=f"Failed to download YouTube video: {str(e)}"
+        )
+    except Exception as e:
+        raise HTTPException(
+            status_code=500, detail=f"Error processing YouTube URL {str(e)}"
+        )

poetry.lock CHANGED Viewed

@@ -4438,7 +4438,29 @@ idna = ">=2.0"
 multidict = ">=4.0"
 propcache = ">=0.2.1"
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.11,<3.14"
-content-hash = "06967d22db236bf08c9130a919b700b8aa709d1230a9802840d58cf31e92ea9e"

 multidict = ">=4.0"
 propcache = ">=0.2.1"
+[[package]]
+name = "yt-dlp"
+version = "2025.9.26"
+description = "A feature-rich command-line audio/video downloader"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "yt_dlp-2025.9.26-py3-none-any.whl", hash = "sha256:36f5fbc153600f759abd48d257231f0e0a547a115ac7ffb05d5b64e5c7fdf8a2"},
+    {file = "yt_dlp-2025.9.26.tar.gz", hash = "sha256:c148ae8233ac4ce6c5fbf6f70fcc390f13a00f59da3776d373cf88c5370bda86"},
+]
+[package.extras]
+build = ["build", "hatchling (>=1.27.0)", "pip", "setuptools (>=71.0.2,<81)", "wheel"]
+curl-cffi = ["curl-cffi (>=0.5.10,<0.6.dev0 || >=0.10.dev0,<0.14) ; implementation_name == \"cpython\""]
+default = ["brotli ; implementation_name == \"cpython\"", "brotlicffi ; implementation_name != \"cpython\"", "certifi", "mutagen", "pycryptodomex", "requests (>=2.32.2,<3)", "urllib3 (>=2.0.2,<3)", "websockets (>=13.0)"]
+dev = ["autopep8 (>=2.0,<3.0)", "pre-commit", "pytest (>=8.1,<9.0)", "pytest-rerunfailures (>=14.0,<15.0)", "ruff (>=0.13.0,<0.14.0)"]
+pyinstaller = ["pyinstaller (>=6.13.0)"]
+secretstorage = ["cffi", "secretstorage"]
+static-analysis = ["autopep8 (>=2.0,<3.0)", "ruff (>=0.13.0,<0.14.0)"]
+test = ["pytest (>=8.1,<9.0)", "pytest-rerunfailures (>=14.0,<15.0)"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.11,<3.14"
+content-hash = "f59e83025d6119da2ef43d5e6155b4246015a8233230422a2992c17e31d71194"

pyproject.toml CHANGED Viewed

@@ -35,7 +35,8 @@ dependencies = [
     "pytest (>=8.4.2,<9.0.0)",
     "python-multipart (>=0.0.20,<0.0.21)",
     "python-dotenv (>=1.1.1,<2.0.0)",
-    "numpy (>=1.24.0,<2.0.0)"
 ]

     "pytest (>=8.4.2,<9.0.0)",
     "python-multipart (>=0.0.20,<0.0.21)",
     "python-dotenv (>=1.1.1,<2.0.0)",
+    "numpy (>=1.24.0,<2.0.0)",
+    "yt-dlp (>=2025.9.26,<2026.0.0)"
 ]

scripts/explain.py CHANGED Viewed

@@ -26,7 +26,13 @@ def musiclime(audio_data, lyrics_text):
     explainer = MusicLIMEExplainer()
     predictor = MusicLIMEPredictor()
-    # Generate explanations
     explanation = explainer.explain_instance(
         audio=audio_data,
         lyrics=lyrics_text,

     explainer = MusicLIMEExplainer()
     predictor = MusicLIMEPredictor()
+    # Truncate raw audio to 2 minutes before any processing
+    target_samples = int(2 * 60 * 22050)
+    if len(audio_data) > target_samples:
+        # Keep first 2 minutes
+        audio_data = audio_data[:target_samples]
+    # Then generate explanations
     explanation = explainer.explain_instance(
         audio=audio_data,
         lyrics=lyrics_text,