Spaces:

pratik-250620
/

MultiModal-Coherence-AI

Running

App Files Files Community

pratik-250620 commited on Feb 20

Commit

0b7335c

verified ·

1 Parent(s): c4a70dd

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

app.py +92 -55

app.py CHANGED Viewed

@@ -1281,12 +1281,22 @@ def plan_extended(prompt: str) -> Optional[Any]:
 # Generation / retrieval functions
 # ---------------------------------------------------------------------------
-# Pollinations image API (free, no auth)
-POLLINATIONS_IMAGE_URL = "https://image.pollinations.ai/prompt"
-# Pollinations audio API (music generation)
-POLLINATIONS_AUDIO_URL = "https://gen.pollinations.ai/v1/audio/speech"
-POLLINATIONS_TTS_URL = "https://gen.pollinations.ai/audio"
 def gen_text(prompt: str, mode: str) -> dict:
@@ -1405,11 +1415,32 @@ def _stable_horde_image(prompt: str, timeout: int = 90) -> Optional[bytes]:
 def generate_image(prompt: str) -> dict:
-    """Generate image: Pollinations → Stable Horde → CLIP retrieval."""
-    # --- Attempt 1: Pollinations.ai (free, no auth) ---
     try:
         encoded = _urlparse.quote(prompt)
-        url = f"{POLLINATIONS_IMAGE_URL}/{encoded}?model=flux&width=1024&height=1024&nologo=true"
         resp = _requests.get(url, timeout=30)
         if resp.status_code == 200 and len(resp.content) > 1000:
             ct = resp.headers.get("content-type", "")
@@ -1421,11 +1452,11 @@ def generate_image(prompt: str) -> dict:
                 "path": tmp.name, "backend": "generative",
                 "model": "Pollinations-FLUX", "failed": False,
             }
-        logger.warning("Pollinations image returned %s", resp.status_code)
     except Exception as e:
-        logger.warning("Pollinations image failed: %s", e)
-    # --- Attempt 2: Stable Horde (free, crowdsourced) ---
     try:
         img_bytes = _stable_horde_image(prompt)
         if img_bytes:
@@ -1445,52 +1476,58 @@ def generate_image(prompt: str) -> dict:
 def generate_audio(prompt: str) -> dict:
-    """Generate audio via Pollinations.ai → CLAP retrieval fallback.
-    Tries Pollinations music endpoint (ElevenMusic) for ambient audio,
-    then Pollinations TTS as a narrative fallback, then CLAP retrieval.
     """
-    # --- Attempt 1: Pollinations music (ambient/soundscape) ---
-    try:
-        resp = _requests.post(
-            POLLINATIONS_AUDIO_URL,
-            json={
-                "model": "elevenmusic",
-                "input": prompt,
-            },
-            timeout=120,
-        )
-        if resp.status_code == 200 and len(resp.content) > 1000:
-            tmp = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False, dir="/tmp")
-            tmp.write(resp.content)
-            tmp.flush()
-            return {
-                "path": tmp.name, "backend": "generative",
-                "model": "Pollinations-Music", "failed": False,
-            }
-        logger.warning("Pollinations music returned %s", resp.status_code)
-    except Exception as e:
-        logger.warning("Pollinations music failed: %s", e)
-    # --- Attempt 2: Pollinations TTS (narrate the scene) ---
-    try:
-        tts_text = f"The sounds of: {prompt}"
-        encoded = _urlparse.quote(tts_text)
-        resp = _requests.get(
-            f"{POLLINATIONS_TTS_URL}/{encoded}?voice=shimmer",
-            timeout=60,
-        )
-        if resp.status_code == 200 and len(resp.content) > 1000:
-            tmp = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False, dir="/tmp")
-            tmp.write(resp.content)
-            tmp.flush()
-            return {
-                "path": tmp.name, "backend": "generative",
-                "model": "Pollinations-TTS", "failed": False,
-            }
-        logger.warning("Pollinations TTS returned %s", resp.status_code)
-    except Exception as e:
-        logger.warning("Pollinations TTS failed: %s", e)
     # --- Fallback: CLAP retrieval ---
     logger.info("Audio generation unavailable — using CLAP retrieval")
@@ -1637,7 +1674,7 @@ def main():
         }
         if backend == "generative":
             img_info = "Pollinations FLUX / Stable Horde (free)"
-            aud_info = "Pollinations Music / CLAP retrieval (free)"
         else:
             img_info = "CLIP retrieval (57 images)"
             aud_info = "CLAP retrieval (104 clips)"

 # Generation / retrieval functions
 # ---------------------------------------------------------------------------
+# Pollinations endpoints
+POLLINATIONS_IMAGE_FREE_URL = "https://image.pollinations.ai/prompt"  # Free, no auth
+POLLINATIONS_GEN_IMAGE_URL = "https://gen.pollinations.ai/image"       # Needs API key
+POLLINATIONS_AUDIO_URL = "https://gen.pollinations.ai/v1/audio/speech"  # Needs API key
+POLLINATIONS_TTS_URL = "https://gen.pollinations.ai/audio"              # Needs API key
+# Stable Horde (free, crowdsourced, no key)
+STABLE_HORDE_URL = "https://stablehorde.net/api/v2"
+def _pollinations_headers() -> dict:
+    """Get auth headers for Pollinations gen.pollinations.ai endpoints."""
+    key = os.environ.get("POLLINATIONS_API_KEY", "")
+    if key:
+        return {"Authorization": f"Bearer {key}"}
+    return {}
 def gen_text(prompt: str, mode: str) -> dict:
 def generate_image(prompt: str) -> dict:
+    """Generate image: Pollinations (auth) → Pollinations (free) → Stable Horde → CLIP retrieval."""
+    # --- Attempt 1: Pollinations gen.pollinations.ai (with API key) ---
+    headers = _pollinations_headers()
+    if headers:
+        try:
+            encoded = _urlparse.quote(prompt)
+            url = f"{POLLINATIONS_GEN_IMAGE_URL}/{encoded}?model=flux&width=1024&height=1024&nologo=true"
+            resp = _requests.get(url, headers=headers, timeout=60)
+            if resp.status_code == 200 and len(resp.content) > 1000:
+                ct = resp.headers.get("content-type", "")
+                suffix = ".jpg" if "jpeg" in ct else ".png"
+                tmp = tempfile.NamedTemporaryFile(suffix=suffix, delete=False, dir="/tmp")
+                tmp.write(resp.content)
+                tmp.flush()
+                return {
+                    "path": tmp.name, "backend": "generative",
+                    "model": "Pollinations-FLUX", "failed": False,
+                }
+            logger.warning("Pollinations auth image returned %s", resp.status_code)
+        except Exception as e:
+            logger.warning("Pollinations auth image failed: %s", e)
+    # --- Attempt 2: Pollinations free endpoint (image.pollinations.ai, no auth) ---
     try:
         encoded = _urlparse.quote(prompt)
+        url = f"{POLLINATIONS_IMAGE_FREE_URL}/{encoded}?model=flux&width=1024&height=1024&nologo=true"
         resp = _requests.get(url, timeout=30)
         if resp.status_code == 200 and len(resp.content) > 1000:
             ct = resp.headers.get("content-type", "")
                 "path": tmp.name, "backend": "generative",
                 "model": "Pollinations-FLUX", "failed": False,
             }
+        logger.warning("Pollinations free image returned %s", resp.status_code)
     except Exception as e:
+        logger.warning("Pollinations free image failed: %s", e)
+    # --- Attempt 3: Stable Horde (free, crowdsourced, ~30-40s) ---
     try:
         img_bytes = _stable_horde_image(prompt)
         if img_bytes:
 def generate_audio(prompt: str) -> dict:
+    """Generate audio via Pollinations.ai (with API key) → CLAP retrieval fallback.
+    Tries Pollinations TTS to narrate the scene ambience (with API key),
+    then falls back to CLAP retrieval.
     """
+    headers = _pollinations_headers()
+    if not headers:
+        logger.info("No POLLINATIONS_API_KEY — skipping audio generation")
+    else:
+        # --- Attempt 1: Pollinations TTS (scene description as speech) ---
+        try:
+            resp = _requests.post(
+                POLLINATIONS_AUDIO_URL,
+                headers=headers,
+                json={
+                    "model": "openai-audio",
+                    "input": prompt,
+                    "voice": "shimmer",
+                },
+                timeout=60,
+            )
+            if resp.status_code == 200 and len(resp.content) > 1000:
+                tmp = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False, dir="/tmp")
+                tmp.write(resp.content)
+                tmp.flush()
+                return {
+                    "path": tmp.name, "backend": "generative",
+                    "model": "Pollinations-TTS", "failed": False,
+                }
+            logger.warning("Pollinations TTS returned %s: %s", resp.status_code, resp.text[:200])
+        except Exception as e:
+            logger.warning("Pollinations TTS failed: %s", e)
+        # --- Attempt 2: Pollinations simple GET TTS ---
+        try:
+            encoded = _urlparse.quote(prompt)
+            resp = _requests.get(
+                f"{POLLINATIONS_TTS_URL}/{encoded}?voice=nova",
+                headers=headers,
+                timeout=60,
+            )
+            if resp.status_code == 200 and len(resp.content) > 1000:
+                tmp = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False, dir="/tmp")
+                tmp.write(resp.content)
+                tmp.flush()
+                return {
+                    "path": tmp.name, "backend": "generative",
+                    "model": "Pollinations-TTS", "failed": False,
+                }
+            logger.warning("Pollinations GET TTS returned %s", resp.status_code)
+        except Exception as e:
+            logger.warning("Pollinations GET TTS failed: %s", e)
     # --- Fallback: CLAP retrieval ---
     logger.info("Audio generation unavailable — using CLAP retrieval")
         }
         if backend == "generative":
             img_info = "Pollinations FLUX / Stable Horde (free)"
+            aud_info = "Pollinations TTS / CLAP retrieval (free)"
         else:
             img_info = "CLIP retrieval (57 images)"
             aud_info = "CLAP retrieval (104 clips)"