krislette commited on
Commit
253a78c
·
1 Parent(s): 7633e2f

Auto-deploy from GitHub: b571980d762eb701851962e3b915d9447fe4e6de

Browse files
Files changed (5) hide show
  1. app/server.py +67 -40
  2. app/utils.py +51 -1
  3. poetry.lock +23 -1
  4. pyproject.toml +2 -1
  5. scripts/explain.py +7 -1
app/server.py CHANGED
@@ -2,10 +2,6 @@
2
  from fastapi import Depends, FastAPI, File, Form, HTTPException, UploadFile
3
  from fastapi.middleware.cors import CORSMiddleware
4
 
5
- # Processing imports
6
- import librosa
7
- import io
8
-
9
  # Utils/schemas imports
10
  from app.schemas import (
11
  ErrorResponse,
@@ -14,12 +10,17 @@ from app.schemas import (
14
  PredictionXAIResponse,
15
  WelcomeResponse,
16
  )
17
- from app.utils import load_config
18
 
19
  # Model/XAI-related imports
20
  from scripts.explain import musiclime
21
  from scripts.predict import predict_pipeline
22
 
 
 
 
 
 
23
 
24
  # Load config at startup
25
  config = load_config()
@@ -43,45 +44,63 @@ app.add_middleware(
43
  )
44
 
45
 
46
- async def validate_audio_file(audio_file: UploadFile = File(...)):
47
- """Validate audio file type and size."""
48
- # Check file size
49
- audio_content = await audio_file.read()
50
- if len(audio_content) > MAX_FILE_SIZE:
51
  raise HTTPException(
52
  status_code=400,
53
- detail=f"File too large. Maximum size is {MAX_FILE_SIZE // (1024*1024)}MB.",
54
  )
55
 
56
- # Check file type
57
- if audio_file.content_type not in ALLOWED_AUDIO_TYPES:
 
58
  raise HTTPException(
59
  status_code=400,
60
- detail=f"Invalid file type. Supported formats: {', '.join(ALLOWED_AUDIO_TYPES)}",
61
  )
62
 
63
- # Reset file pointer for later use
64
- audio_file.file.seek(0)
65
- return audio_file, audio_content
66
 
67
 
68
- def validate_lyrics(lyrics: str = Form(...)):
69
- """Validate lyrics length and content."""
70
- if len(lyrics) > MAX_LYRICS_LENGTH:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  raise HTTPException(
72
  status_code=400,
73
- detail=f"Lyrics too long. Maximum length is {MAX_LYRICS_LENGTH} characters.",
74
  )
75
 
76
- # Basic sanitization, remove excessive whitespace
77
- lyrics = lyrics.strip()
78
- if not lyrics:
79
  raise HTTPException(
80
  status_code=400,
81
- detail="Lyrics cannot be empty.",
82
  )
83
 
84
- return lyrics
85
 
86
 
87
  @app.get("/", response_model=WelcomeResponse, tags=["Root"])
@@ -108,32 +127,36 @@ def root():
108
  responses={400: {"model": ErrorResponse}, 500: {"model": ErrorResponse}},
109
  )
110
  async def predict_music(
111
- lyrics: str = Depends(validate_lyrics), audio_file_data=Depends(validate_audio_file)
 
112
  ):
113
  """
114
  Endpoint to predict whether a music sample is human-composed or AI-generated.
 
115
  """
116
  try:
117
- # Get the audio file and content from sanitized and cleaned audio file
118
- audio_file, audio_content = audio_file_data
119
 
120
- # Load audio from uploaded file with error handling for corrupted files
121
  try:
122
  audio_data, sr = librosa.load(io.BytesIO(audio_content))
123
  except Exception as e:
124
  raise HTTPException(status_code=400, detail=f"Invalid audio file: {str(e)}")
125
 
126
- # Call MLP predict runner script to get results
127
  results = predict_pipeline(audio_data, lyrics)
128
 
129
  return PredictionResponse(
130
  status="success",
131
  lyrics=lyrics,
132
- audio_file_name=audio_file.filename,
133
- audio_content_type=audio_file.content_type,
134
  audio_file_size=len(audio_content),
135
  results=results,
136
  )
 
 
137
  except Exception as e:
138
  raise HTTPException(status_code=500, detail=str(e))
139
 
@@ -144,32 +167,36 @@ async def predict_music(
144
  responses={400: {"model": ErrorResponse}, 500: {"model": ErrorResponse}},
145
  )
146
  async def predict_music_with_xai(
147
- lyrics: str = Depends(validate_lyrics), audio_file_data=Depends(validate_audio_file)
 
148
  ):
149
  """
150
  Endpoint to predict whether a music sample is human-composed or AI-generated with explainability.
 
151
  """
152
  try:
153
- # Get the audio file and content from sanitized and cleaned audio file
154
- audio_file, audio_content = audio_file_data
155
 
156
- # Load audio from uploaded file with error handling for corrupted files
157
  try:
158
  audio_data, sr = librosa.load(io.BytesIO(audio_content))
159
  except Exception as e:
160
  raise HTTPException(status_code=400, detail=f"Invalid audio file: {str(e)}")
161
 
162
- # Call musiclime runner script to get results
163
  results = musiclime(audio_data, lyrics)
164
 
165
  return PredictionXAIResponse(
166
  status="success",
167
  lyrics=lyrics,
168
- audio_file_name=audio_file.filename,
169
- audio_content_type=audio_file.content_type,
170
  audio_file_size=len(audio_content),
171
  results=results,
172
  )
 
 
173
  except Exception as e:
174
  raise HTTPException(status_code=500, detail=str(e))
175
 
 
2
  from fastapi import Depends, FastAPI, File, Form, HTTPException, UploadFile
3
  from fastapi.middleware.cors import CORSMiddleware
4
 
 
 
 
 
5
  # Utils/schemas imports
6
  from app.schemas import (
7
  ErrorResponse,
 
10
  PredictionXAIResponse,
11
  WelcomeResponse,
12
  )
13
+ from app.utils import load_config, download_youtube_audio
14
 
15
  # Model/XAI-related imports
16
  from scripts.explain import musiclime
17
  from scripts.predict import predict_pipeline
18
 
19
+ # Other imports
20
+ import io
21
+ import librosa
22
+ from typing import Optional, Tuple
23
+
24
 
25
  # Load config at startup
26
  config = load_config()
 
44
  )
45
 
46
 
47
+ def validate_lyrics(lyrics: str = Form(...)):
48
+ """Validate lyrics length and content."""
49
+ if len(lyrics) > MAX_LYRICS_LENGTH:
 
 
50
  raise HTTPException(
51
  status_code=400,
52
+ detail=f"Lyrics too long. Maximum length is {MAX_LYRICS_LENGTH} characters.",
53
  )
54
 
55
+ # Basic sanitization, remove excessive whitespace
56
+ lyrics = lyrics.strip()
57
+ if not lyrics:
58
  raise HTTPException(
59
  status_code=400,
60
+ detail="Lyrics cannot be empty.",
61
  )
62
 
63
+ return lyrics
 
 
64
 
65
 
66
+ async def validate_audio_source(
67
+ audio_file: Optional[UploadFile] = File(None),
68
+ youtube_url: Optional[str] = Form(None),
69
+ ) -> Tuple[Optional[bytes], str, str]:
70
+ """
71
+ Validate and process audio source (either file or YouTube URL).
72
+ Returns: (audio_content, file_name, content_type)
73
+ """
74
+ if not audio_file and not youtube_url:
75
+ raise HTTPException(
76
+ status_code=400, detail="Either audio_file or youtube_url must be provided"
77
+ )
78
+
79
+ if audio_file and youtube_url:
80
+ raise HTTPException(
81
+ status_code=400, detail="Provide either audio_file or youtube_url, not both"
82
+ )
83
+
84
+ # Process YouTube URL
85
+ if youtube_url:
86
+ audio_content = download_youtube_audio(youtube_url)
87
+ return audio_content, "youtube_audio.wav", "audio/wav"
88
+
89
+ # Process uploaded file
90
+ if audio_file.content_type not in ALLOWED_AUDIO_TYPES:
91
  raise HTTPException(
92
  status_code=400,
93
+ detail=f"Invalid file type. Supported formats: {', '.join(ALLOWED_AUDIO_TYPES)}",
94
  )
95
 
96
+ audio_content = await audio_file.read()
97
+ if len(audio_content) > MAX_FILE_SIZE:
 
98
  raise HTTPException(
99
  status_code=400,
100
+ detail=f"File too large. Maximum size is {MAX_FILE_SIZE // (1024*1024)}MB.",
101
  )
102
 
103
+ return audio_content, audio_file.filename, audio_file.content_type
104
 
105
 
106
  @app.get("/", response_model=WelcomeResponse, tags=["Root"])
 
127
  responses={400: {"model": ErrorResponse}, 500: {"model": ErrorResponse}},
128
  )
129
  async def predict_music(
130
+ lyrics: str = Depends(validate_lyrics),
131
+ audio_data_tuple: Tuple = Depends(validate_audio_source),
132
  ):
133
  """
134
  Endpoint to predict whether a music sample is human-composed or AI-generated.
135
+ Accepts either an audio file upload or a YouTube URL.
136
  """
137
  try:
138
+ # Unpack validated data
139
+ audio_content, audio_file_name, audio_content_type = audio_data_tuple
140
 
141
+ # Load audio with librosa
142
  try:
143
  audio_data, sr = librosa.load(io.BytesIO(audio_content))
144
  except Exception as e:
145
  raise HTTPException(status_code=400, detail=f"Invalid audio file: {str(e)}")
146
 
147
+ # Call MLP predict runner script
148
  results = predict_pipeline(audio_data, lyrics)
149
 
150
  return PredictionResponse(
151
  status="success",
152
  lyrics=lyrics,
153
+ audio_file_name=audio_file_name,
154
+ audio_content_type=audio_content_type,
155
  audio_file_size=len(audio_content),
156
  results=results,
157
  )
158
+ except HTTPException:
159
+ raise
160
  except Exception as e:
161
  raise HTTPException(status_code=500, detail=str(e))
162
 
 
167
  responses={400: {"model": ErrorResponse}, 500: {"model": ErrorResponse}},
168
  )
169
  async def predict_music_with_xai(
170
+ lyrics: str = Depends(validate_lyrics),
171
+ audio_data_tuple: Tuple = Depends(validate_audio_source),
172
  ):
173
  """
174
  Endpoint to predict whether a music sample is human-composed or AI-generated with explainability.
175
+ Accepts either an audio file upload or a YouTube URL.
176
  """
177
  try:
178
+ # Unpack validated data
179
+ audio_content, audio_file_name, audio_content_type = audio_data_tuple
180
 
181
+ # Load audio with librosa
182
  try:
183
  audio_data, sr = librosa.load(io.BytesIO(audio_content))
184
  except Exception as e:
185
  raise HTTPException(status_code=400, detail=f"Invalid audio file: {str(e)}")
186
 
187
+ # Call musiclime runner script
188
  results = musiclime(audio_data, lyrics)
189
 
190
  return PredictionXAIResponse(
191
  status="success",
192
  lyrics=lyrics,
193
+ audio_file_name=audio_file_name,
194
+ audio_content_type=audio_content_type,
195
  audio_file_size=len(audio_content),
196
  results=results,
197
  )
198
+ except HTTPException:
199
+ raise
200
  except Exception as e:
201
  raise HTTPException(status_code=500, detail=str(e))
202
 
app/utils.py CHANGED
@@ -1,5 +1,12 @@
1
- from pathlib import Path
 
 
2
  import yaml
 
 
 
 
 
3
 
4
 
5
  def load_config():
@@ -14,3 +21,46 @@ def load_config():
14
 
15
  with open(config_path, "r") as file:
16
  return yaml.safe_load(file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import tempfile
3
+ import os
4
  import yaml
5
+ import yt_dlp
6
+
7
+ from fastapi import HTTPException
8
+ from pathlib import Path
9
+ from yt_dlp.utils import DownloadError
10
 
11
 
12
  def load_config():
 
21
 
22
  with open(config_path, "r") as file:
23
  return yaml.safe_load(file)
24
+
25
+
26
+ def download_youtube_audio(youtube_url: str) -> bytes:
27
+ """
28
+ Download audio from YouTube URL and return as bytes.
29
+ """
30
+ try:
31
+ # Create a temporary directory for download
32
+ with tempfile.TemporaryDirectory() as temp_dir:
33
+ output_path = os.path.join(temp_dir, "audio.mp3")
34
+
35
+ # yt-dlp options for best audio quality
36
+ ydl_opts = {
37
+ "format": "bestaudio/best",
38
+ "postprocessors": [
39
+ {
40
+ "key": "FFmpegExtractAudio",
41
+ "preferredcodec": "mp3",
42
+ "preferredquality": "192",
43
+ }
44
+ ],
45
+ "outtmpl": output_path.replace(".mp3", ""),
46
+ "quiet": True,
47
+ "no_warnings": True,
48
+ }
49
+
50
+ # Download the audio
51
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
52
+ ydl.download([youtube_url])
53
+
54
+ # Read the downloaded file
55
+ with open(output_path, "rb") as file:
56
+ audio_content = file.read()
57
+
58
+ return audio_content
59
+ except DownloadError as e:
60
+ raise HTTPException(
61
+ status_code=400, detail=f"Failed to download YouTube video: {str(e)}"
62
+ )
63
+ except Exception as e:
64
+ raise HTTPException(
65
+ status_code=500, detail=f"Error processing YouTube URL {str(e)}"
66
+ )
poetry.lock CHANGED
@@ -4438,7 +4438,29 @@ idna = ">=2.0"
4438
  multidict = ">=4.0"
4439
  propcache = ">=0.2.1"
4440
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4441
  [metadata]
4442
  lock-version = "2.1"
4443
  python-versions = ">=3.11,<3.14"
4444
- content-hash = "06967d22db236bf08c9130a919b700b8aa709d1230a9802840d58cf31e92ea9e"
 
4438
  multidict = ">=4.0"
4439
  propcache = ">=0.2.1"
4440
 
4441
+ [[package]]
4442
+ name = "yt-dlp"
4443
+ version = "2025.9.26"
4444
+ description = "A feature-rich command-line audio/video downloader"
4445
+ optional = false
4446
+ python-versions = ">=3.9"
4447
+ groups = ["main"]
4448
+ files = [
4449
+ {file = "yt_dlp-2025.9.26-py3-none-any.whl", hash = "sha256:36f5fbc153600f759abd48d257231f0e0a547a115ac7ffb05d5b64e5c7fdf8a2"},
4450
+ {file = "yt_dlp-2025.9.26.tar.gz", hash = "sha256:c148ae8233ac4ce6c5fbf6f70fcc390f13a00f59da3776d373cf88c5370bda86"},
4451
+ ]
4452
+
4453
+ [package.extras]
4454
+ build = ["build", "hatchling (>=1.27.0)", "pip", "setuptools (>=71.0.2,<81)", "wheel"]
4455
+ curl-cffi = ["curl-cffi (>=0.5.10,<0.6.dev0 || >=0.10.dev0,<0.14) ; implementation_name == \"cpython\""]
4456
+ default = ["brotli ; implementation_name == \"cpython\"", "brotlicffi ; implementation_name != \"cpython\"", "certifi", "mutagen", "pycryptodomex", "requests (>=2.32.2,<3)", "urllib3 (>=2.0.2,<3)", "websockets (>=13.0)"]
4457
+ dev = ["autopep8 (>=2.0,<3.0)", "pre-commit", "pytest (>=8.1,<9.0)", "pytest-rerunfailures (>=14.0,<15.0)", "ruff (>=0.13.0,<0.14.0)"]
4458
+ pyinstaller = ["pyinstaller (>=6.13.0)"]
4459
+ secretstorage = ["cffi", "secretstorage"]
4460
+ static-analysis = ["autopep8 (>=2.0,<3.0)", "ruff (>=0.13.0,<0.14.0)"]
4461
+ test = ["pytest (>=8.1,<9.0)", "pytest-rerunfailures (>=14.0,<15.0)"]
4462
+
4463
  [metadata]
4464
  lock-version = "2.1"
4465
  python-versions = ">=3.11,<3.14"
4466
+ content-hash = "f59e83025d6119da2ef43d5e6155b4246015a8233230422a2992c17e31d71194"
pyproject.toml CHANGED
@@ -35,7 +35,8 @@ dependencies = [
35
  "pytest (>=8.4.2,<9.0.0)",
36
  "python-multipart (>=0.0.20,<0.0.21)",
37
  "python-dotenv (>=1.1.1,<2.0.0)",
38
- "numpy (>=1.24.0,<2.0.0)"
 
39
  ]
40
 
41
 
 
35
  "pytest (>=8.4.2,<9.0.0)",
36
  "python-multipart (>=0.0.20,<0.0.21)",
37
  "python-dotenv (>=1.1.1,<2.0.0)",
38
+ "numpy (>=1.24.0,<2.0.0)",
39
+ "yt-dlp (>=2025.9.26,<2026.0.0)"
40
  ]
41
 
42
 
scripts/explain.py CHANGED
@@ -26,7 +26,13 @@ def musiclime(audio_data, lyrics_text):
26
  explainer = MusicLIMEExplainer()
27
  predictor = MusicLIMEPredictor()
28
 
29
- # Generate explanations
 
 
 
 
 
 
30
  explanation = explainer.explain_instance(
31
  audio=audio_data,
32
  lyrics=lyrics_text,
 
26
  explainer = MusicLIMEExplainer()
27
  predictor = MusicLIMEPredictor()
28
 
29
+ # Truncate raw audio to 2 minutes before any processing
30
+ target_samples = int(2 * 60 * 22050)
31
+ if len(audio_data) > target_samples:
32
+ # Keep first 2 minutes
33
+ audio_data = audio_data[:target_samples]
34
+
35
+ # Then generate explanations
36
  explanation = explainer.explain_instance(
37
  audio=audio_data,
38
  lyrics=lyrics_text,