pratik-250620 commited on
Commit
0b7335c
·
verified ·
1 Parent(s): c4a70dd

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +92 -55
app.py CHANGED
@@ -1281,12 +1281,22 @@ def plan_extended(prompt: str) -> Optional[Any]:
1281
  # Generation / retrieval functions
1282
  # ---------------------------------------------------------------------------
1283
 
1284
- # Pollinations image API (free, no auth)
1285
- POLLINATIONS_IMAGE_URL = "https://image.pollinations.ai/prompt"
 
 
 
1286
 
1287
- # Pollinations audio API (music generation)
1288
- POLLINATIONS_AUDIO_URL = "https://gen.pollinations.ai/v1/audio/speech"
1289
- POLLINATIONS_TTS_URL = "https://gen.pollinations.ai/audio"
 
 
 
 
 
 
 
1290
 
1291
 
1292
  def gen_text(prompt: str, mode: str) -> dict:
@@ -1405,11 +1415,32 @@ def _stable_horde_image(prompt: str, timeout: int = 90) -> Optional[bytes]:
1405
 
1406
 
1407
  def generate_image(prompt: str) -> dict:
1408
- """Generate image: Pollinations → Stable Horde → CLIP retrieval."""
1409
- # --- Attempt 1: Pollinations.ai (free, no auth) ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1410
  try:
1411
  encoded = _urlparse.quote(prompt)
1412
- url = f"{POLLINATIONS_IMAGE_URL}/{encoded}?model=flux&width=1024&height=1024&nologo=true"
1413
  resp = _requests.get(url, timeout=30)
1414
  if resp.status_code == 200 and len(resp.content) > 1000:
1415
  ct = resp.headers.get("content-type", "")
@@ -1421,11 +1452,11 @@ def generate_image(prompt: str) -> dict:
1421
  "path": tmp.name, "backend": "generative",
1422
  "model": "Pollinations-FLUX", "failed": False,
1423
  }
1424
- logger.warning("Pollinations image returned %s", resp.status_code)
1425
  except Exception as e:
1426
- logger.warning("Pollinations image failed: %s", e)
1427
 
1428
- # --- Attempt 2: Stable Horde (free, crowdsourced) ---
1429
  try:
1430
  img_bytes = _stable_horde_image(prompt)
1431
  if img_bytes:
@@ -1445,52 +1476,58 @@ def generate_image(prompt: str) -> dict:
1445
 
1446
 
1447
  def generate_audio(prompt: str) -> dict:
1448
- """Generate audio via Pollinations.ai → CLAP retrieval fallback.
1449
 
1450
- Tries Pollinations music endpoint (ElevenMusic) for ambient audio,
1451
- then Pollinations TTS as a narrative fallback, then CLAP retrieval.
1452
  """
1453
- # --- Attempt 1: Pollinations music (ambient/soundscape) ---
1454
- try:
1455
- resp = _requests.post(
1456
- POLLINATIONS_AUDIO_URL,
1457
- json={
1458
- "model": "elevenmusic",
1459
- "input": prompt,
1460
- },
1461
- timeout=120,
1462
- )
1463
- if resp.status_code == 200 and len(resp.content) > 1000:
1464
- tmp = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False, dir="/tmp")
1465
- tmp.write(resp.content)
1466
- tmp.flush()
1467
- return {
1468
- "path": tmp.name, "backend": "generative",
1469
- "model": "Pollinations-Music", "failed": False,
1470
- }
1471
- logger.warning("Pollinations music returned %s", resp.status_code)
1472
- except Exception as e:
1473
- logger.warning("Pollinations music failed: %s", e)
 
 
 
 
 
 
1474
 
1475
- # --- Attempt 2: Pollinations TTS (narrate the scene) ---
1476
- try:
1477
- tts_text = f"The sounds of: {prompt}"
1478
- encoded = _urlparse.quote(tts_text)
1479
- resp = _requests.get(
1480
- f"{POLLINATIONS_TTS_URL}/{encoded}?voice=shimmer",
1481
- timeout=60,
1482
- )
1483
- if resp.status_code == 200 and len(resp.content) > 1000:
1484
- tmp = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False, dir="/tmp")
1485
- tmp.write(resp.content)
1486
- tmp.flush()
1487
- return {
1488
- "path": tmp.name, "backend": "generative",
1489
- "model": "Pollinations-TTS", "failed": False,
1490
- }
1491
- logger.warning("Pollinations TTS returned %s", resp.status_code)
1492
- except Exception as e:
1493
- logger.warning("Pollinations TTS failed: %s", e)
1494
 
1495
  # --- Fallback: CLAP retrieval ---
1496
  logger.info("Audio generation unavailable — using CLAP retrieval")
@@ -1637,7 +1674,7 @@ def main():
1637
  }
1638
  if backend == "generative":
1639
  img_info = "Pollinations FLUX / Stable Horde (free)"
1640
- aud_info = "Pollinations Music / CLAP retrieval (free)"
1641
  else:
1642
  img_info = "CLIP retrieval (57 images)"
1643
  aud_info = "CLAP retrieval (104 clips)"
 
1281
  # Generation / retrieval functions
1282
  # ---------------------------------------------------------------------------
1283
 
1284
+ # Pollinations endpoints
1285
+ POLLINATIONS_IMAGE_FREE_URL = "https://image.pollinations.ai/prompt" # Free, no auth
1286
+ POLLINATIONS_GEN_IMAGE_URL = "https://gen.pollinations.ai/image" # Needs API key
1287
+ POLLINATIONS_AUDIO_URL = "https://gen.pollinations.ai/v1/audio/speech" # Needs API key
1288
+ POLLINATIONS_TTS_URL = "https://gen.pollinations.ai/audio" # Needs API key
1289
 
1290
+ # Stable Horde (free, crowdsourced, no key)
1291
+ STABLE_HORDE_URL = "https://stablehorde.net/api/v2"
1292
+
1293
+
1294
+ def _pollinations_headers() -> dict:
1295
+ """Get auth headers for Pollinations gen.pollinations.ai endpoints."""
1296
+ key = os.environ.get("POLLINATIONS_API_KEY", "")
1297
+ if key:
1298
+ return {"Authorization": f"Bearer {key}"}
1299
+ return {}
1300
 
1301
 
1302
  def gen_text(prompt: str, mode: str) -> dict:
 
1415
 
1416
 
1417
  def generate_image(prompt: str) -> dict:
1418
+ """Generate image: Pollinations (auth) Pollinations (free) → Stable Horde → CLIP retrieval."""
1419
+ # --- Attempt 1: Pollinations gen.pollinations.ai (with API key) ---
1420
+ headers = _pollinations_headers()
1421
+ if headers:
1422
+ try:
1423
+ encoded = _urlparse.quote(prompt)
1424
+ url = f"{POLLINATIONS_GEN_IMAGE_URL}/{encoded}?model=flux&width=1024&height=1024&nologo=true"
1425
+ resp = _requests.get(url, headers=headers, timeout=60)
1426
+ if resp.status_code == 200 and len(resp.content) > 1000:
1427
+ ct = resp.headers.get("content-type", "")
1428
+ suffix = ".jpg" if "jpeg" in ct else ".png"
1429
+ tmp = tempfile.NamedTemporaryFile(suffix=suffix, delete=False, dir="/tmp")
1430
+ tmp.write(resp.content)
1431
+ tmp.flush()
1432
+ return {
1433
+ "path": tmp.name, "backend": "generative",
1434
+ "model": "Pollinations-FLUX", "failed": False,
1435
+ }
1436
+ logger.warning("Pollinations auth image returned %s", resp.status_code)
1437
+ except Exception as e:
1438
+ logger.warning("Pollinations auth image failed: %s", e)
1439
+
1440
+ # --- Attempt 2: Pollinations free endpoint (image.pollinations.ai, no auth) ---
1441
  try:
1442
  encoded = _urlparse.quote(prompt)
1443
+ url = f"{POLLINATIONS_IMAGE_FREE_URL}/{encoded}?model=flux&width=1024&height=1024&nologo=true"
1444
  resp = _requests.get(url, timeout=30)
1445
  if resp.status_code == 200 and len(resp.content) > 1000:
1446
  ct = resp.headers.get("content-type", "")
 
1452
  "path": tmp.name, "backend": "generative",
1453
  "model": "Pollinations-FLUX", "failed": False,
1454
  }
1455
+ logger.warning("Pollinations free image returned %s", resp.status_code)
1456
  except Exception as e:
1457
+ logger.warning("Pollinations free image failed: %s", e)
1458
 
1459
+ # --- Attempt 3: Stable Horde (free, crowdsourced, ~30-40s) ---
1460
  try:
1461
  img_bytes = _stable_horde_image(prompt)
1462
  if img_bytes:
 
1476
 
1477
 
1478
  def generate_audio(prompt: str) -> dict:
1479
+ """Generate audio via Pollinations.ai (with API key) → CLAP retrieval fallback.
1480
 
1481
+ Tries Pollinations TTS to narrate the scene ambience (with API key),
1482
+ then falls back to CLAP retrieval.
1483
  """
1484
+ headers = _pollinations_headers()
1485
+ if not headers:
1486
+ logger.info("No POLLINATIONS_API_KEY — skipping audio generation")
1487
+ else:
1488
+ # --- Attempt 1: Pollinations TTS (scene description as speech) ---
1489
+ try:
1490
+ resp = _requests.post(
1491
+ POLLINATIONS_AUDIO_URL,
1492
+ headers=headers,
1493
+ json={
1494
+ "model": "openai-audio",
1495
+ "input": prompt,
1496
+ "voice": "shimmer",
1497
+ },
1498
+ timeout=60,
1499
+ )
1500
+ if resp.status_code == 200 and len(resp.content) > 1000:
1501
+ tmp = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False, dir="/tmp")
1502
+ tmp.write(resp.content)
1503
+ tmp.flush()
1504
+ return {
1505
+ "path": tmp.name, "backend": "generative",
1506
+ "model": "Pollinations-TTS", "failed": False,
1507
+ }
1508
+ logger.warning("Pollinations TTS returned %s: %s", resp.status_code, resp.text[:200])
1509
+ except Exception as e:
1510
+ logger.warning("Pollinations TTS failed: %s", e)
1511
 
1512
+ # --- Attempt 2: Pollinations simple GET TTS ---
1513
+ try:
1514
+ encoded = _urlparse.quote(prompt)
1515
+ resp = _requests.get(
1516
+ f"{POLLINATIONS_TTS_URL}/{encoded}?voice=nova",
1517
+ headers=headers,
1518
+ timeout=60,
1519
+ )
1520
+ if resp.status_code == 200 and len(resp.content) > 1000:
1521
+ tmp = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False, dir="/tmp")
1522
+ tmp.write(resp.content)
1523
+ tmp.flush()
1524
+ return {
1525
+ "path": tmp.name, "backend": "generative",
1526
+ "model": "Pollinations-TTS", "failed": False,
1527
+ }
1528
+ logger.warning("Pollinations GET TTS returned %s", resp.status_code)
1529
+ except Exception as e:
1530
+ logger.warning("Pollinations GET TTS failed: %s", e)
1531
 
1532
  # --- Fallback: CLAP retrieval ---
1533
  logger.info("Audio generation unavailable — using CLAP retrieval")
 
1674
  }
1675
  if backend == "generative":
1676
  img_info = "Pollinations FLUX / Stable Horde (free)"
1677
+ aud_info = "Pollinations TTS / CLAP retrieval (free)"
1678
  else:
1679
  img_info = "CLIP retrieval (57 images)"
1680
  aud_info = "CLAP retrieval (104 clips)"