Spaces:
Sleeping
Sleeping
File size: 2,321 Bytes
ab62c9e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 | # fetch_gaia_audio.py
import os
import re
import requests
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
OUT_PATH = "/mnt/data/test.wav"
def main():
# 1) Fetch GAIA questions
resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
resp.raise_for_status()
questions = resp.json()
# 2) Try attachments field first
for q in questions:
for field in ("attachments", "attachment", "audio"):
urls = q.get(field)
if not urls:
continue
if isinstance(urls, str):
urls = [urls]
for url in urls:
if is_media_url(url):
return download_audio(url)
# 3) Fallback: regex scan in question text
pattern = re.compile(r"(https?://\S+\.(?:mp3|wav))", re.IGNORECASE)
for q in questions:
text = q.get("question", "")
match = pattern.search(text)
if match:
url = match.group(1)
return download_audio(url)
print("⚠️ No .mp3/.wav URL found in GAIA payload; skipping download.")
return
def is_media_url(url: str) -> bool:
return bool(re.match(r"^https?://.*\.(?:mp3|wav)$", url, re.IGNORECASE))
def download_audio(url: str):
print(f"Downloading audio from {url}")
r = requests.get(url, timeout=30)
r.raise_for_status()
ext = os.path.splitext(url)[1].lower()
content = r.content
if ext == ".mp3":
# try to convert to wav if pydub installed
try:
from pydub import AudioSegment
mp3_path = "/mnt/data/tmp.mp3"
with open(mp3_path, "wb") as f:
f.write(content)
audio = AudioSegment.from_mp3(mp3_path)
audio.export(OUT_PATH, format="wav")
print(f"✔ Saved WAV to {OUT_PATH}")
return
except ImportError:
# fallback: write raw mp3 bytes
OUT = OUT_PATH.replace(".wav", ".mp3")
with open(OUT, "wb") as f:
f.write(content)
print(f"⚠ pydub not installed; saved MP3 to {OUT}")
return
# if it's .wav or any other, write directly
with open(OUT_PATH, "wb") as f:
f.write(content)
print(f"✔ Saved WAV to {OUT_PATH}")
if __name__ == "__main__":
main()
|