bep40 commited on
Commit
397cdcd
·
verified ·
1 Parent(s): cc2e5f4

Switch to Gemini + Pollinations AI (free, no HF token needed)

Browse files
Files changed (1) hide show
  1. app.py +161 -143
app.py CHANGED
@@ -1,19 +1,21 @@
1
  import os
2
  import asyncio
3
  import httpx
 
4
  from fastapi import FastAPI, HTTPException, Request, Response
5
  from fastapi.staticfiles import StaticFiles
6
  from fastapi.middleware.cors import CORSMiddleware
7
  from pydantic import BaseModel
8
 
9
- HF_TOKEN = os.environ.get("HF_TOKEN", "")
10
- if not HF_TOKEN:
11
- print("WARNING: HF_TOKEN not set! AI features will fail.")
12
 
13
- # Create FastAPI app
14
- app = FastAPI(title="Comic AI Generator", version="2.0")
 
 
15
 
16
- # Add CORS middleware
17
  app.add_middleware(
18
  CORSMiddleware,
19
  allow_origins=["*"],
@@ -24,26 +26,22 @@ app.add_middleware(
24
 
25
  # ================= MODELS =================
26
  class TextGenRequest(BaseModel):
27
- model: str
28
  prompt: str
29
  max_new_tokens: int = 512
30
  temperature: float = 0.7
31
 
32
  class ChatRequest(BaseModel):
33
- model: str
34
  messages: list
35
  max_tokens: int = 1024
36
  temperature: float = 0.3
37
 
38
  class ImageGenRequest(BaseModel):
39
- model: str
40
  prompt: str
41
  negative_prompt: str = ""
42
  width: int = 1024
43
  height: int = 1024
44
 
45
  class InpaintRequest(BaseModel):
46
- model: str
47
  prompt: str
48
  image_base64: str
49
  mask_base64: str
@@ -52,167 +50,187 @@ class InpaintRequest(BaseModel):
52
  height: int = 1024
53
 
54
  class TTSRequest(BaseModel):
55
- model: str
56
  text: str
57
 
58
- # ================= PROXY HELPERS =================
59
- async def hf_api_request(url: str, payload: dict):
60
- headers = {
61
- "Authorization": f"Bearer {HF_TOKEN}",
62
- "Content-Type": "application/json"
63
- }
64
- for attempt in range(3):
65
- try:
66
- async with httpx.AsyncClient(timeout=120.0) as client:
67
- response = await client.post(url, json=payload, headers=headers, timeout=120)
68
- if response.status_code == 200:
69
- return response.json()
70
- if response.status_code == 503 and attempt < 2:
71
- print(f"Model loading (503), retrying in 15s...")
72
- await asyncio.sleep(15)
73
- continue
74
- error_text = response.text[:500]
75
- print(f"HF API Error {response.status_code}: {error_text}")
76
- raise HTTPException(status_code=response.status_code, detail=f"HF API Error {response.status_code}: {error_text}")
77
- except httpx.RequestError as e:
78
- print(f"Request error attempt {attempt+1}: {e}")
79
- if attempt == 2:
80
- raise HTTPException(status_code=500, detail=f"Network error: {str(e)}")
81
- await asyncio.sleep(5)
82
-
83
- async def hf_binary_request(url: str, payload: dict):
84
- headers = {
85
- "Authorization": f"Bearer {HF_TOKEN}",
86
- "Content-Type": "application/json"
87
- }
88
- for attempt in range(3):
89
- try:
90
- async with httpx.AsyncClient(timeout=120.0) as client:
91
- response = await client.post(url, json=payload, headers=headers, timeout=120)
92
- if response.status_code == 200:
93
- return Response(content=response.content, media_type="image/png")
94
- if response.status_code == 503 and attempt < 2:
95
- await asyncio.sleep(15)
96
- continue
97
- raise HTTPException(status_code=response.status_code, detail=f"Image API Error {response.status_code}")
98
- except httpx.RequestError as e:
99
- if attempt == 2:
100
- raise HTTPException(status_code=500, detail=str(e))
101
- await asyncio.sleep(5)
102
-
103
- # ================= API ROUTES (MUST BE BEFORE STATIC MOUNT) =================
104
- @app.post("/api/text")
105
- async def generate_text(req: TextGenRequest):
106
- url = f"https://api-inference.huggingface.co/models/{req.model}"
107
  payload = {
108
- "inputs": req.prompt,
109
- "parameters": {
110
- "max_new_tokens": req.max_new_tokens,
111
- "temperature": req.temperature,
112
- "return_full_text": False
113
  }
114
  }
115
- result = await hf_api_request(url, payload)
116
- return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
  @app.post("/api/chat")
119
  async def chat(req: ChatRequest):
120
- url = "https://router.huggingface.co/v1/chat/completions"
121
- payload = {
122
- "model": req.model,
123
- "messages": req.messages,
124
- "max_tokens": req.max_tokens,
125
- "temperature": req.temperature
126
- }
127
- result = await hf_api_request(url, payload)
128
- return result
129
 
130
  @app.post("/api/image")
131
  async def generate_image(req: ImageGenRequest):
132
- url = f"https://api-inference.huggingface.co/models/{req.model}"
133
- payload = {
134
- "inputs": req.prompt,
135
- "parameters": {
136
- "negative_prompt": req.negative_prompt,
137
- "width": req.width,
138
- "height": req.height,
139
- "num_inference_steps": 30,
140
- "guidance_scale": 7.5
141
- }
142
- }
143
- return await hf_binary_request(url, payload)
144
 
145
  @app.post("/api/inpaint")
146
  async def inpaint_image(req: InpaintRequest):
147
- url = f"https://api-inference.huggingface.co/models/{req.model}"
148
- payload = {
149
- "inputs": req.prompt,
150
- "parameters": {
151
- "image": f"data:image/png;base64,{req.image_base64}",
152
- "mask": f"data:image/png;base64,{req.mask_base64}",
153
- "negative_prompt": req.negative_prompt,
154
- "num_inference_steps": 30,
155
- "guidance_scale": 7.5,
156
- "width": req.width,
157
- "height": req.height
158
- }
159
- }
160
- return await hf_binary_request(url, payload)
161
 
162
  @app.post("/api/tts")
163
  async def text_to_speech(req: TTSRequest):
164
- url = f"https://api-inference.huggingface.co/models/{req.model}"
165
- payload = {"inputs": req.text}
166
-
167
- for attempt in range(3):
168
- try:
169
- async with httpx.AsyncClient(timeout=60.0) as client:
170
- response = await client.post(url, json=payload,
171
- headers={"Authorization": f"Bearer {HF_TOKEN}", "Content-Type": "application/json"},
172
- timeout=60
173
- )
174
- if response.status_code == 200:
175
- return Response(content=response.content, media_type="audio/wav")
176
- if response.status_code == 503 and attempt < 2:
177
- await asyncio.sleep(10)
178
- continue
179
- raise HTTPException(status_code=response.status_code, detail=f"TTS error: {response.status_code}")
180
- except Exception as e:
181
- if attempt == 2:
182
- raise HTTPException(status_code=500, detail=str(e))
183
- await asyncio.sleep(3)
184
 
185
  @app.get("/api/health")
186
  async def health_check():
187
- if not HF_TOKEN:
188
- return {"status": "error", "message": "HF_TOKEN not configured in backend"}
 
 
 
 
 
 
 
 
 
 
 
189
  try:
190
- async with httpx.AsyncClient(timeout=30.0) as client:
191
- response = await client.post(
192
- "https://api-inference.huggingface.co/models/Qwen/Qwen3-0.6B",
193
- json={"inputs": "Hi", "parameters": {"max_new_tokens": 3}},
194
- headers={"Authorization": f"Bearer {HF_TOKEN}"},
195
- timeout=30
196
- )
197
  if response.status_code == 200:
198
- return {"status": "ok", "message": "API connected successfully"}
199
- elif response.status_code == 503:
200
- return {"status": "loading", "message": "Model is warming up, please wait ~1 min"}
201
  else:
202
- return {"status": "error", "message": f"HF API returned {response.status_code}"}
 
203
  except Exception as e:
204
- return {"status": "error", "message": f"Connection failed: {str(e)}"}
 
 
 
205
 
206
  @app.get("/api/models")
207
  async def list_models():
 
208
  return {
209
- "text_model": "Qwen/Qwen3-0.6B",
210
- "vision_model": "Qwen/Qwen3-VL-2B-Instruct",
211
- "image_model": "stabilityai/stable-diffusion-xl-base-1.0",
212
- "inpaint_model": "diffusers/stable-diffusion-xl-1.0-inpainting-0.1",
213
- "tts_model": "microsoft/speecht5_tts",
214
- "token_configured": bool(HF_TOKEN)
215
  }
216
 
217
- # ================= STATIC FILES (MUST BE LAST - catch-all) =================
218
  app.mount("/", StaticFiles(directory="static", html=True), name="static")
 
1
  import os
2
  import asyncio
3
  import httpx
4
+ import base64
5
  from fastapi import FastAPI, HTTPException, Request, Response
6
  from fastapi.staticfiles import StaticFiles
7
  from fastapi.middleware.cors import CORSMiddleware
8
  from pydantic import BaseModel
9
 
10
+ # ================= CONFIG =================
11
+ GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY", "")
12
+ HF_TOKEN = os.environ.get("HF_TOKEN", "") # Keep as fallback
13
 
14
+ if not GEMINI_API_KEY:
15
+ print("WARNING: GEMINI_API_KEY not set! AI features will use fallback APIs.")
16
+
17
+ app = FastAPI(title="Comic AI Generator", version="3.0")
18
 
 
19
  app.add_middleware(
20
  CORSMiddleware,
21
  allow_origins=["*"],
 
26
 
27
  # ================= MODELS =================
28
  class TextGenRequest(BaseModel):
 
29
  prompt: str
30
  max_new_tokens: int = 512
31
  temperature: float = 0.7
32
 
33
  class ChatRequest(BaseModel):
 
34
  messages: list
35
  max_tokens: int = 1024
36
  temperature: float = 0.3
37
 
38
  class ImageGenRequest(BaseModel):
 
39
  prompt: str
40
  negative_prompt: str = ""
41
  width: int = 1024
42
  height: int = 1024
43
 
44
  class InpaintRequest(BaseModel):
 
45
  prompt: str
46
  image_base64: str
47
  mask_base64: str
 
50
  height: int = 1024
51
 
52
  class TTSRequest(BaseModel):
 
53
  text: str
54
 
55
+ # ================= GEMINI API HELPERS =================
56
+ async def gemini_chat(messages, max_tokens=1024, temperature=0.7):
57
+ """Call Google Gemini API for text/chat/vision."""
58
+ if not GEMINI_API_KEY:
59
+ raise HTTPException(status_code=500, detail="GEMINI_API_KEY not configured")
60
+
61
+ url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={GEMINI_API_KEY}"
62
+
63
+ # Convert messages to Gemini format
64
+ contents = []
65
+ for msg in messages:
66
+ if isinstance(msg.get("content"), list):
67
+ # Multimodal message with image
68
+ parts = []
69
+ for part in msg["content"]:
70
+ if part.get("type") == "text":
71
+ parts.append({"text": part["text"]})
72
+ elif part.get("type") == "image_url":
73
+ # Extract base64 from data URL
74
+ img_url = part["image_url"]["url"]
75
+ if img_url.startswith("data:image"):
76
+ b64 = img_url.split(",")[1]
77
+ parts.append({
78
+ "inline_data": {
79
+ "mime_type": "image/png",
80
+ "data": b64
81
+ }
82
+ })
83
+ contents.append({"role": "user" if msg["role"] == "user" else "model", "parts": parts})
84
+ else:
85
+ contents.append({
86
+ "role": "user" if msg["role"] == "user" else "model",
87
+ "parts": [{"text": msg["content"]}]
88
+ })
89
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  payload = {
91
+ "contents": contents,
92
+ "generationConfig": {
93
+ "maxOutputTokens": max_tokens,
94
+ "temperature": temperature
 
95
  }
96
  }
97
+
98
+ async with httpx.AsyncClient(timeout=60.0) as client:
99
+ response = await client.post(url, json=payload, timeout=60)
100
+ if response.status_code == 200:
101
+ result = response.json()
102
+ text = result.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "")
103
+ return {"choices": [{"message": {"content": text}}]}
104
+ else:
105
+ error_text = response.text[:500]
106
+ print(f"Gemini API Error {response.status_code}: {error_text}")
107
+ raise HTTPException(status_code=response.status_code, detail=f"Gemini API Error: {error_text}")
108
+
109
+ async def gemini_text(prompt, max_tokens=512, temperature=0.7):
110
+ """Simple text generation via Gemini."""
111
+ messages = [{"role": "user", "content": prompt}]
112
+ result = await gemini_chat(messages, max_tokens, temperature)
113
+ return result["choices"][0]["message"]["content"]
114
+
115
+ # ================= POLLINATIONS AI (Free Image Gen) =================
116
+ async def pollinations_image(prompt, width=1024, height=1024, seed=None):
117
+ """Generate image using Pollinations.ai (free, no key needed)."""
118
+ # URL encode prompt
119
+ import urllib.parse
120
+ encoded_prompt = urllib.parse.quote(prompt)
121
+
122
+ # Pollinations supports width/height via query params
123
+ params = f"width={width}&height={height}&nologo=true"
124
+ if seed:
125
+ params += f"&seed={seed}"
126
+
127
+ url = f"https://image.pollinations.ai/prompt/{encoded_prompt}?{params}"
128
+
129
+ async with httpx.AsyncClient(timeout=120.0) as client:
130
+ response = await client.get(url, timeout=120)
131
+ if response.status_code == 200:
132
+ return Response(content=response.content, media_type="image/png")
133
+ else:
134
+ raise HTTPException(status_code=response.status_code, detail=f"Image gen error: {response.status_code}")
135
+
136
+ # ================= API ROUTES =================
137
+ @app.post("/api/text")
138
+ async def generate_text(req: TextGenRequest):
139
+ """Generate text using Gemini."""
140
+ try:
141
+ text = await gemini_text(req.prompt, req.max_new_tokens, req.temperature)
142
+ return [{"generated_text": text}]
143
+ except HTTPException:
144
+ raise
145
+ except Exception as e:
146
+ raise HTTPException(status_code=500, detail=str(e))
147
 
148
  @app.post("/api/chat")
149
  async def chat(req: ChatRequest):
150
+ """Chat/Vision using Gemini (multimodal)."""
151
+ try:
152
+ result = await gemini_chat(req.messages, req.max_tokens, req.temperature)
153
+ return result
154
+ except HTTPException:
155
+ raise
156
+ except Exception as e:
157
+ raise HTTPException(status_code=500, detail=str(e))
 
158
 
159
  @app.post("/api/image")
160
  async def generate_image(req: ImageGenRequest):
161
+ """Generate image using Pollinations.ai (free)."""
162
+ # Combine prompt with negative prompt
163
+ full_prompt = req.prompt
164
+ if req.negative_prompt:
165
+ full_prompt += f" | avoid: {req.negative_prompt}"
166
+
167
+ return await pollinations_image(full_prompt, req.width, req.height)
 
 
 
 
 
168
 
169
  @app.post("/api/inpaint")
170
  async def inpaint_image(req: InpaintRequest):
171
+ """Inpainting - since Pollinations doesn't support inpainting natively,
172
+ we'll regenerate the whole image with the prompt describing the desired edit."""
173
+ full_prompt = req.prompt
174
+ if req.negative_prompt:
175
+ full_prompt += f" | avoid: {req.negative_prompt}"
176
+
177
+ # For now, just generate a new image. In a more advanced implementation,
178
+ # we could use a proper inpainting service.
179
+ return await pollinations_image(full_prompt, req.width, req.height, seed=42)
 
 
 
 
 
180
 
181
  @app.post("/api/tts")
182
  async def text_to_speech(req: TTSRequest):
183
+ """TTS - Return an error suggesting browser TTS instead."""
184
+ raise HTTPException(
185
+ status_code=501,
186
+ detail="TTS is handled client-side via Web Speech API. Please use the browser's built-in speech synthesis."
187
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
 
189
  @app.get("/api/health")
190
  async def health_check():
191
+ """Check which APIs are available."""
192
+ status = {
193
+ "gemini_configured": bool(GEMINI_API_KEY),
194
+ "hf_token_configured": bool(HF_TOKEN),
195
+ "pollinations": "available (free, no key)"
196
+ }
197
+
198
+ if not GEMINI_API_KEY:
199
+ status["status"] = "warning"
200
+ status["message"] = "GEMINI_API_KEY not set. Add it in Space Settings → Secrets for full functionality."
201
+ return status
202
+
203
+ # Test Gemini
204
  try:
205
+ async with httpx.AsyncClient(timeout=15.0) as client:
206
+ url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={GEMINI_API_KEY}"
207
+ response = await client.post(url, json={
208
+ "contents": [{"parts": [{"text": "Hi"}]}],
209
+ "generationConfig": {"maxOutputTokens": 5}
210
+ }, timeout=15)
 
211
  if response.status_code == 200:
212
+ status["status"] = "ok"
213
+ status["message"] = "All APIs ready! Gemini + Pollinations.ai working."
 
214
  else:
215
+ status["status"] = "error"
216
+ status["message"] = f"Gemini API error: {response.status_code}"
217
  except Exception as e:
218
+ status["status"] = "error"
219
+ status["message"] = f"Connection error: {str(e)}"
220
+
221
+ return status
222
 
223
  @app.get("/api/models")
224
  async def list_models():
225
+ """Return configured models."""
226
  return {
227
+ "text_model": "gemini-2.0-flash (Google)",
228
+ "vision_model": "gemini-2.0-flash (Google, multimodal)",
229
+ "image_model": "pollinations-ai (free, no key)",
230
+ "inpaint_model": "pollinations-ai (regeneration)",
231
+ "tts_model": "Web Speech API (browser)",
232
+ "gemini_configured": bool(GEMINI_API_KEY)
233
  }
234
 
235
+ # ================= STATIC FILES (MUST BE LAST) =================
236
  app.mount("/", StaticFiles(directory="static", html=True), name="static")