Spaces:

bep40
/

comic-ai-generator

Sleeping

App Files Files Community

bep40 commited on Apr 28

Commit

397cdcd

verified ·

1 Parent(s): cc2e5f4

Switch to Gemini + Pollinations AI (free, no HF token needed)

Browse files

Files changed (1) hide show

app.py +161 -143

app.py CHANGED Viewed

@@ -1,19 +1,21 @@
 import os
 import asyncio
 import httpx
 from fastapi import FastAPI, HTTPException, Request, Response
 from fastapi.staticfiles import StaticFiles
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
-HF_TOKEN = os.environ.get("HF_TOKEN", "")
-if not HF_TOKEN:
-    print("WARNING: HF_TOKEN not set! AI features will fail.")
-# Create FastAPI app
-app = FastAPI(title="Comic AI Generator", version="2.0")
-# Add CORS middleware
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
@@ -24,26 +26,22 @@ app.add_middleware(
 # ================= MODELS =================
 class TextGenRequest(BaseModel):
-    model: str
     prompt: str
     max_new_tokens: int = 512
     temperature: float = 0.7
 class ChatRequest(BaseModel):
-    model: str
     messages: list
     max_tokens: int = 1024
     temperature: float = 0.3
 class ImageGenRequest(BaseModel):
-    model: str
     prompt: str
     negative_prompt: str = ""
     width: int = 1024
     height: int = 1024
 class InpaintRequest(BaseModel):
-    model: str
     prompt: str
     image_base64: str
     mask_base64: str
@@ -52,167 +50,187 @@ class InpaintRequest(BaseModel):
     height: int = 1024
 class TTSRequest(BaseModel):
-    model: str
     text: str
-# ================= PROXY HELPERS =================
-async def hf_api_request(url: str, payload: dict):
-    headers = {
-        "Authorization": f"Bearer {HF_TOKEN}",
-        "Content-Type": "application/json"
-    }
-    for attempt in range(3):
-        try:
-            async with httpx.AsyncClient(timeout=120.0) as client:
-                response = await client.post(url, json=payload, headers=headers, timeout=120)
-                if response.status_code == 200:
-                    return response.json()
-                if response.status_code == 503 and attempt < 2:
-                    print(f"Model loading (503), retrying in 15s...")
-                    await asyncio.sleep(15)
-                    continue
-                error_text = response.text[:500]
-                print(f"HF API Error {response.status_code}: {error_text}")
-                raise HTTPException(status_code=response.status_code, detail=f"HF API Error {response.status_code}: {error_text}")
-        except httpx.RequestError as e:
-            print(f"Request error attempt {attempt+1}: {e}")
-            if attempt == 2:
-                raise HTTPException(status_code=500, detail=f"Network error: {str(e)}")
-            await asyncio.sleep(5)
-async def hf_binary_request(url: str, payload: dict):
-    headers = {
-        "Authorization": f"Bearer {HF_TOKEN}",
-        "Content-Type": "application/json"
-    }
-    for attempt in range(3):
-        try:
-            async with httpx.AsyncClient(timeout=120.0) as client:
-                response = await client.post(url, json=payload, headers=headers, timeout=120)
-                if response.status_code == 200:
-                    return Response(content=response.content, media_type="image/png")
-                if response.status_code == 503 and attempt < 2:
-                    await asyncio.sleep(15)
-                    continue
-                raise HTTPException(status_code=response.status_code, detail=f"Image API Error {response.status_code}")
-        except httpx.RequestError as e:
-            if attempt == 2:
-                raise HTTPException(status_code=500, detail=str(e))
-            await asyncio.sleep(5)
-# ================= API ROUTES (MUST BE BEFORE STATIC MOUNT) =================
-@app.post("/api/text")
-async def generate_text(req: TextGenRequest):
-    url = f"https://api-inference.huggingface.co/models/{req.model}"
     payload = {
-        "inputs": req.prompt,
-        "parameters": {
-            "max_new_tokens": req.max_new_tokens,
-            "temperature": req.temperature,
-            "return_full_text": False
         }
     }
-    result = await hf_api_request(url, payload)
-    return result
 @app.post("/api/chat")
 async def chat(req: ChatRequest):
-    url = "https://router.huggingface.co/v1/chat/completions"
-    payload = {
-        "model": req.model,
-        "messages": req.messages,
-        "max_tokens": req.max_tokens,
-        "temperature": req.temperature
-    }
-    result = await hf_api_request(url, payload)
-    return result
 @app.post("/api/image")
 async def generate_image(req: ImageGenRequest):
-    url = f"https://api-inference.huggingface.co/models/{req.model}"
-    payload = {
-        "inputs": req.prompt,
-        "parameters": {
-            "negative_prompt": req.negative_prompt,
-            "width": req.width,
-            "height": req.height,
-            "num_inference_steps": 30,
-            "guidance_scale": 7.5
-        }
-    }
-    return await hf_binary_request(url, payload)
 @app.post("/api/inpaint")
 async def inpaint_image(req: InpaintRequest):
-    url = f"https://api-inference.huggingface.co/models/{req.model}"
-    payload = {
-        "inputs": req.prompt,
-        "parameters": {
-            "image": f"data:image/png;base64,{req.image_base64}",
-            "mask": f"data:image/png;base64,{req.mask_base64}",
-            "negative_prompt": req.negative_prompt,
-            "num_inference_steps": 30,
-            "guidance_scale": 7.5,
-            "width": req.width,
-            "height": req.height
-        }
-    }
-    return await hf_binary_request(url, payload)
 @app.post("/api/tts")
 async def text_to_speech(req: TTSRequest):
-    url = f"https://api-inference.huggingface.co/models/{req.model}"
-    payload = {"inputs": req.text}
-    for attempt in range(3):
-        try:
-            async with httpx.AsyncClient(timeout=60.0) as client:
-                response = await client.post(url, json=payload,
-                    headers={"Authorization": f"Bearer {HF_TOKEN}", "Content-Type": "application/json"},
-                    timeout=60
-                )
-                if response.status_code == 200:
-                    return Response(content=response.content, media_type="audio/wav")
-                if response.status_code == 503 and attempt < 2:
-                    await asyncio.sleep(10)
-                    continue
-                raise HTTPException(status_code=response.status_code, detail=f"TTS error: {response.status_code}")
-        except Exception as e:
-            if attempt == 2:
-                raise HTTPException(status_code=500, detail=str(e))
-            await asyncio.sleep(3)
 @app.get("/api/health")
 async def health_check():
-    if not HF_TOKEN:
-        return {"status": "error", "message": "HF_TOKEN not configured in backend"}
     try:
-        async with httpx.AsyncClient(timeout=30.0) as client:
-            response = await client.post(
-                "https://api-inference.huggingface.co/models/Qwen/Qwen3-0.6B",
-                json={"inputs": "Hi", "parameters": {"max_new_tokens": 3}},
-                headers={"Authorization": f"Bearer {HF_TOKEN}"},
-                timeout=30
-            )
             if response.status_code == 200:
-                return {"status": "ok", "message": "API connected successfully"}
-            elif response.status_code == 503:
-                return {"status": "loading", "message": "Model is warming up, please wait ~1 min"}
             else:
-                return {"status": "error", "message": f"HF API returned {response.status_code}"}
     except Exception as e:
-        return {"status": "error", "message": f"Connection failed: {str(e)}"}
 @app.get("/api/models")
 async def list_models():
     return {
-        "text_model": "Qwen/Qwen3-0.6B",
-        "vision_model": "Qwen/Qwen3-VL-2B-Instruct",
-        "image_model": "stabilityai/stable-diffusion-xl-base-1.0",
-        "inpaint_model": "diffusers/stable-diffusion-xl-1.0-inpainting-0.1",
-        "tts_model": "microsoft/speecht5_tts",
-        "token_configured": bool(HF_TOKEN)
     }
-# ================= STATIC FILES (MUST BE LAST - catch-all) =================
 app.mount("/", StaticFiles(directory="static", html=True), name="static")

 import os
 import asyncio
 import httpx
+import base64
 from fastapi import FastAPI, HTTPException, Request, Response
 from fastapi.staticfiles import StaticFiles
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
+# ================= CONFIG =================
+GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY", "")
+HF_TOKEN = os.environ.get("HF_TOKEN", "")  # Keep as fallback
+if not GEMINI_API_KEY:
+    print("WARNING: GEMINI_API_KEY not set! AI features will use fallback APIs.")
+app = FastAPI(title="Comic AI Generator", version="3.0")
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
 # ================= MODELS =================
 class TextGenRequest(BaseModel):
     prompt: str
     max_new_tokens: int = 512
     temperature: float = 0.7
 class ChatRequest(BaseModel):
     messages: list
     max_tokens: int = 1024
     temperature: float = 0.3
 class ImageGenRequest(BaseModel):
     prompt: str
     negative_prompt: str = ""
     width: int = 1024
     height: int = 1024
 class InpaintRequest(BaseModel):
     prompt: str
     image_base64: str
     mask_base64: str
     height: int = 1024
 class TTSRequest(BaseModel):
     text: str
+# ================= GEMINI API HELPERS =================
+async def gemini_chat(messages, max_tokens=1024, temperature=0.7):
+    """Call Google Gemini API for text/chat/vision."""
+    if not GEMINI_API_KEY:
+        raise HTTPException(status_code=500, detail="GEMINI_API_KEY not configured")
+    url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={GEMINI_API_KEY}"
+    # Convert messages to Gemini format
+    contents = []
+    for msg in messages:
+        if isinstance(msg.get("content"), list):
+            # Multimodal message with image
+            parts = []
+            for part in msg["content"]:
+                if part.get("type") == "text":
+                    parts.append({"text": part["text"]})
+                elif part.get("type") == "image_url":
+                    # Extract base64 from data URL
+                    img_url = part["image_url"]["url"]
+                    if img_url.startswith("data:image"):
+                        b64 = img_url.split(",")[1]
+                        parts.append({
+                            "inline_data": {
+                                "mime_type": "image/png",
+                                "data": b64
+                            }
+                        })
+            contents.append({"role": "user" if msg["role"] == "user" else "model", "parts": parts})
+        else:
+            contents.append({
+                "role": "user" if msg["role"] == "user" else "model",
+                "parts": [{"text": msg["content"]}]
+            })
     payload = {
+        "contents": contents,
+        "generationConfig": {
+            "maxOutputTokens": max_tokens,
+            "temperature": temperature
         }
     }
+    async with httpx.AsyncClient(timeout=60.0) as client:
+        response = await client.post(url, json=payload, timeout=60)
+        if response.status_code == 200:
+            result = response.json()
+            text = result.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "")
+            return {"choices": [{"message": {"content": text}}]}
+        else:
+            error_text = response.text[:500]
+            print(f"Gemini API Error {response.status_code}: {error_text}")
+            raise HTTPException(status_code=response.status_code, detail=f"Gemini API Error: {error_text}")
+async def gemini_text(prompt, max_tokens=512, temperature=0.7):
+    """Simple text generation via Gemini."""
+    messages = [{"role": "user", "content": prompt}]
+    result = await gemini_chat(messages, max_tokens, temperature)
+    return result["choices"][0]["message"]["content"]
+# ================= POLLINATIONS AI (Free Image Gen) =================
+async def pollinations_image(prompt, width=1024, height=1024, seed=None):
+    """Generate image using Pollinations.ai (free, no key needed)."""
+    # URL encode prompt
+    import urllib.parse
+    encoded_prompt = urllib.parse.quote(prompt)
+    # Pollinations supports width/height via query params
+    params = f"width={width}&height={height}&nologo=true"
+    if seed:
+        params += f"&seed={seed}"
+    url = f"https://image.pollinations.ai/prompt/{encoded_prompt}?{params}"
+    async with httpx.AsyncClient(timeout=120.0) as client:
+        response = await client.get(url, timeout=120)
+        if response.status_code == 200:
+            return Response(content=response.content, media_type="image/png")
+        else:
+            raise HTTPException(status_code=response.status_code, detail=f"Image gen error: {response.status_code}")
+# ================= API ROUTES =================
+@app.post("/api/text")
+async def generate_text(req: TextGenRequest):
+    """Generate text using Gemini."""
+    try:
+        text = await gemini_text(req.prompt, req.max_new_tokens, req.temperature)
+        return [{"generated_text": text}]
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
 @app.post("/api/chat")
 async def chat(req: ChatRequest):
+    """Chat/Vision using Gemini (multimodal)."""
+    try:
+        result = await gemini_chat(req.messages, req.max_tokens, req.temperature)
+        return result
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
 @app.post("/api/image")
 async def generate_image(req: ImageGenRequest):
+    """Generate image using Pollinations.ai (free)."""
+    # Combine prompt with negative prompt
+    full_prompt = req.prompt
+    if req.negative_prompt:
+        full_prompt += f" | avoid: {req.negative_prompt}"
+    return await pollinations_image(full_prompt, req.width, req.height)
 @app.post("/api/inpaint")
 async def inpaint_image(req: InpaintRequest):
+    """Inpainting - since Pollinations doesn't support inpainting natively,
+    we'll regenerate the whole image with the prompt describing the desired edit."""
+    full_prompt = req.prompt
+    if req.negative_prompt:
+        full_prompt += f" | avoid: {req.negative_prompt}"
+    # For now, just generate a new image. In a more advanced implementation,
+    # we could use a proper inpainting service.
+    return await pollinations_image(full_prompt, req.width, req.height, seed=42)
 @app.post("/api/tts")
 async def text_to_speech(req: TTSRequest):
+    """TTS - Return an error suggesting browser TTS instead."""
+    raise HTTPException(
+        status_code=501,
+        detail="TTS is handled client-side via Web Speech API. Please use the browser's built-in speech synthesis."
+    )
 @app.get("/api/health")
 async def health_check():
+    """Check which APIs are available."""
+    status = {
+        "gemini_configured": bool(GEMINI_API_KEY),
+        "hf_token_configured": bool(HF_TOKEN),
+        "pollinations": "available (free, no key)"
+    }
+    if not GEMINI_API_KEY:
+        status["status"] = "warning"
+        status["message"] = "GEMINI_API_KEY not set. Add it in Space Settings → Secrets for full functionality."
+        return status
+    # Test Gemini
     try:
+        async with httpx.AsyncClient(timeout=15.0) as client:
+            url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={GEMINI_API_KEY}"
+            response = await client.post(url, json={
+                "contents": [{"parts": [{"text": "Hi"}]}],
+                "generationConfig": {"maxOutputTokens": 5}
+            }, timeout=15)
             if response.status_code == 200:
+                status["status"] = "ok"
+                status["message"] = "All APIs ready! Gemini + Pollinations.ai working."
             else:
+                status["status"] = "error"
+                status["message"] = f"Gemini API error: {response.status_code}"
     except Exception as e:
+        status["status"] = "error"
+        status["message"] = f"Connection error: {str(e)}"
+    return status
 @app.get("/api/models")
 async def list_models():
+    """Return configured models."""
     return {
+        "text_model": "gemini-2.0-flash (Google)",
+        "vision_model": "gemini-2.0-flash (Google, multimodal)",
+        "image_model": "pollinations-ai (free, no key)",
+        "inpaint_model": "pollinations-ai (regeneration)",
+        "tts_model": "Web Speech API (browser)",
+        "gemini_configured": bool(GEMINI_API_KEY)
     }
+# ================= STATIC FILES (MUST BE LAST) =================
 app.mount("/", StaticFiles(directory="static", html=True), name="static")