Spaces:
Sleeping
Sleeping
| # fetch_gaia_audio.py | |
| import os | |
| import re | |
| import requests | |
| DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" | |
| OUT_PATH = "/mnt/data/test.wav" | |
| def main(): | |
| # 1) Fetch GAIA questions | |
| resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15) | |
| resp.raise_for_status() | |
| questions = resp.json() | |
| # 2) Try attachments field first | |
| for q in questions: | |
| for field in ("attachments", "attachment", "audio"): | |
| urls = q.get(field) | |
| if not urls: | |
| continue | |
| if isinstance(urls, str): | |
| urls = [urls] | |
| for url in urls: | |
| if is_media_url(url): | |
| return download_audio(url) | |
| # 3) Fallback: regex scan in question text | |
| pattern = re.compile(r"(https?://\S+\.(?:mp3|wav))", re.IGNORECASE) | |
| for q in questions: | |
| text = q.get("question", "") | |
| match = pattern.search(text) | |
| if match: | |
| url = match.group(1) | |
| return download_audio(url) | |
| print("⚠️ No .mp3/.wav URL found in GAIA payload; skipping download.") | |
| return | |
| def is_media_url(url: str) -> bool: | |
| return bool(re.match(r"^https?://.*\.(?:mp3|wav)$", url, re.IGNORECASE)) | |
| def download_audio(url: str): | |
| print(f"Downloading audio from {url}") | |
| r = requests.get(url, timeout=30) | |
| r.raise_for_status() | |
| ext = os.path.splitext(url)[1].lower() | |
| content = r.content | |
| if ext == ".mp3": | |
| # try to convert to wav if pydub installed | |
| try: | |
| from pydub import AudioSegment | |
| mp3_path = "/mnt/data/tmp.mp3" | |
| with open(mp3_path, "wb") as f: | |
| f.write(content) | |
| audio = AudioSegment.from_mp3(mp3_path) | |
| audio.export(OUT_PATH, format="wav") | |
| print(f"✔ Saved WAV to {OUT_PATH}") | |
| return | |
| except ImportError: | |
| # fallback: write raw mp3 bytes | |
| OUT = OUT_PATH.replace(".wav", ".mp3") | |
| with open(OUT, "wb") as f: | |
| f.write(content) | |
| print(f"⚠ pydub not installed; saved MP3 to {OUT}") | |
| return | |
| # if it's .wav or any other, write directly | |
| with open(OUT_PATH, "wb") as f: | |
| f.write(content) | |
| print(f"✔ Saved WAV to {OUT_PATH}") | |
| if __name__ == "__main__": | |
| main() | |