Upload folder using huggingface_hub
Browse files- app.py +36 -62
- requirements.txt +1 -0
app.py
CHANGED
|
@@ -1478,51 +1478,48 @@ def generate_image(prompt: str) -> dict:
|
|
| 1478 |
return retrieve_image(prompt)
|
| 1479 |
|
| 1480 |
|
| 1481 |
-
def
|
| 1482 |
-
"""Generate ambient audio via
|
| 1483 |
-
|
| 1484 |
-
|
| 1485 |
-
|
| 1486 |
-
result = retrieve_audio(prompt)
|
| 1487 |
-
result["generation_unavailable"] = True
|
| 1488 |
-
result["sfx_error"] = "ELEVENLABS_API_KEY not set"
|
| 1489 |
-
return result
|
| 1490 |
-
|
| 1491 |
-
# --- ElevenLabs Sound Effects (actual ambient sounds, NOT speech) ---
|
| 1492 |
try:
|
| 1493 |
-
|
| 1494 |
-
|
| 1495 |
-
|
| 1496 |
-
|
| 1497 |
-
|
| 1498 |
-
|
| 1499 |
-
|
| 1500 |
-
|
| 1501 |
-
"text": prompt,
|
| 1502 |
-
"duration_seconds": 8.0,
|
| 1503 |
-
"prompt_influence": 0.5,
|
| 1504 |
-
},
|
| 1505 |
-
timeout=120,
|
| 1506 |
)
|
| 1507 |
-
|
| 1508 |
-
|
| 1509 |
-
|
| 1510 |
-
|
| 1511 |
-
tmp.flush()
|
| 1512 |
-
return {
|
| 1513 |
-
"path": tmp.name, "backend": "generative",
|
| 1514 |
-
"model": "ElevenLabs-SFX", "failed": False,
|
| 1515 |
-
}
|
| 1516 |
-
err = f"HTTP {resp.status_code}: {resp.text[:200]}"
|
| 1517 |
-
logger.warning("ElevenLabs SFX failed: %s", err)
|
| 1518 |
except Exception as e:
|
| 1519 |
-
|
| 1520 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1521 |
|
| 1522 |
# --- Fallback: CLAP retrieval ---
|
|
|
|
| 1523 |
result = retrieve_audio(prompt)
|
| 1524 |
result["generation_unavailable"] = True
|
| 1525 |
-
result["sfx_error"] = err
|
| 1526 |
return result
|
| 1527 |
|
| 1528 |
|
|
@@ -1664,7 +1661,7 @@ def main():
|
|
| 1664 |
}
|
| 1665 |
if backend == "generative":
|
| 1666 |
img_info = "Pollinations FLUX / Stable Horde (free)"
|
| 1667 |
-
aud_info = "
|
| 1668 |
else:
|
| 1669 |
img_info = "CLIP retrieval (57 images)"
|
| 1670 |
aud_info = "CLAP retrieval (104 clips)"
|
|
@@ -1681,29 +1678,6 @@ def main():
|
|
| 1681 |
f'CLAP HTSAT-unfused (coherence eval)'
|
| 1682 |
f'</div>', unsafe_allow_html=True)
|
| 1683 |
|
| 1684 |
-
# --- Audio API diagnostic ---
|
| 1685 |
-
with st.expander("Audio API diagnostic"):
|
| 1686 |
-
el_key = os.environ.get("ELEVENLABS_API_KEY", "")
|
| 1687 |
-
if el_key:
|
| 1688 |
-
st.success(f"ELEVENLABS_API_KEY set ({el_key[:8]}...)")
|
| 1689 |
-
if st.button("Test ElevenLabs SFX"):
|
| 1690 |
-
try:
|
| 1691 |
-
test_resp = _requests.post(
|
| 1692 |
-
ELEVENLABS_SFX_URL,
|
| 1693 |
-
headers={"xi-api-key": el_key, "Content-Type": "application/json"},
|
| 1694 |
-
json={"text": "gentle rain on leaves", "duration_seconds": 3, "prompt_influence": 0.5},
|
| 1695 |
-
timeout=60,
|
| 1696 |
-
)
|
| 1697 |
-
if test_resp.status_code == 200 and len(test_resp.content) > 500:
|
| 1698 |
-
st.success(f"OK! {len(test_resp.content)} bytes generated")
|
| 1699 |
-
st.audio(test_resp.content, format="audio/mp3")
|
| 1700 |
-
else:
|
| 1701 |
-
st.error(f"HTTP {test_resp.status_code}: {test_resp.text[:300]}")
|
| 1702 |
-
except Exception as e:
|
| 1703 |
-
st.error(f"Error: {e}")
|
| 1704 |
-
else:
|
| 1705 |
-
st.error("ELEVENLABS_API_KEY is NOT set!")
|
| 1706 |
-
|
| 1707 |
# Apply CSS based on mode
|
| 1708 |
if kid_mode:
|
| 1709 |
st.markdown(KID_CSS, unsafe_allow_html=True) # kid theme (includes all needed overrides)
|
|
|
|
| 1478 |
return retrieve_image(prompt)
|
| 1479 |
|
| 1480 |
|
| 1481 |
+
def _stable_audio_generate(prompt: str, duration: float = 8.0) -> Optional[str]:
|
| 1482 |
+
"""Generate ambient audio via Stable Audio Open (free Gradio Space, no API key).
|
| 1483 |
+
|
| 1484 |
+
Returns path to generated WAV file or None on failure.
|
| 1485 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1486 |
try:
|
| 1487 |
+
from gradio_client import Client as GradioClient
|
| 1488 |
+
client = GradioClient("artificialguybr/Stable-Audio-Open-Zero", verbose=False)
|
| 1489 |
+
result = client.predict(
|
| 1490 |
+
prompt=prompt,
|
| 1491 |
+
seconds_total=duration,
|
| 1492 |
+
steps=50,
|
| 1493 |
+
cfg_scale=7,
|
| 1494 |
+
api_name="/predict",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1495 |
)
|
| 1496 |
+
if result and os.path.exists(result):
|
| 1497 |
+
logger.info("Stable Audio generated: %s (%d bytes)", result, os.path.getsize(result))
|
| 1498 |
+
return result
|
| 1499 |
+
logger.warning("Stable Audio returned invalid path: %s", result)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1500 |
except Exception as e:
|
| 1501 |
+
logger.warning("Stable Audio failed: %s", e)
|
| 1502 |
+
return None
|
| 1503 |
+
|
| 1504 |
+
|
| 1505 |
+
def generate_audio(prompt: str) -> dict:
|
| 1506 |
+
"""Generate ambient audio via Stable Audio Open → CLAP retrieval fallback.
|
| 1507 |
+
|
| 1508 |
+
Uses a free GPU-powered Gradio Space (no API key needed) to generate
|
| 1509 |
+
actual ambient sounds from text prompts.
|
| 1510 |
+
"""
|
| 1511 |
+
# --- Attempt 1: Stable Audio Open (free, GPU-powered, real ambient audio) ---
|
| 1512 |
+
path = _stable_audio_generate(prompt, duration=8.0)
|
| 1513 |
+
if path:
|
| 1514 |
+
return {
|
| 1515 |
+
"path": path, "backend": "generative",
|
| 1516 |
+
"model": "Stable-Audio-Open", "failed": False,
|
| 1517 |
+
}
|
| 1518 |
|
| 1519 |
# --- Fallback: CLAP retrieval ---
|
| 1520 |
+
logger.info("Audio generation unavailable — using CLAP retrieval")
|
| 1521 |
result = retrieve_audio(prompt)
|
| 1522 |
result["generation_unavailable"] = True
|
|
|
|
| 1523 |
return result
|
| 1524 |
|
| 1525 |
|
|
|
|
| 1661 |
}
|
| 1662 |
if backend == "generative":
|
| 1663 |
img_info = "Pollinations FLUX / Stable Horde (free)"
|
| 1664 |
+
aud_info = "Stable Audio Open / CLAP retrieval (free)"
|
| 1665 |
else:
|
| 1666 |
img_info = "CLIP retrieval (57 images)"
|
| 1667 |
aud_info = "CLAP retrieval (104 clips)"
|
|
|
|
| 1678 |
f'CLAP HTSAT-unfused (coherence eval)'
|
| 1679 |
f'</div>', unsafe_allow_html=True)
|
| 1680 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1681 |
# Apply CSS based on mode
|
| 1682 |
if kid_mode:
|
| 1683 |
st.markdown(KID_CSS, unsafe_allow_html=True) # kid theme (includes all needed overrides)
|
requirements.txt
CHANGED
|
@@ -13,3 +13,4 @@ pillow>=10.0.0
|
|
| 13 |
pydantic>=2.0.0
|
| 14 |
pydantic-settings>=2.0.0
|
| 15 |
requests>=2.28.0
|
|
|
|
|
|
| 13 |
pydantic>=2.0.0
|
| 14 |
pydantic-settings>=2.0.0
|
| 15 |
requests>=2.28.0
|
| 16 |
+
gradio_client>=1.0.0
|