bigbossmonster commited on
Commit
f2bd091
·
verified ·
1 Parent(s): a79f889

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -177
app.py CHANGED
@@ -9,14 +9,15 @@ import base64
9
  from concurrent.futures import ThreadPoolExecutor
10
  from PIL import Image, ImageOps
11
 
12
-
13
  from fastapi import FastAPI, UploadFile, File, Form, HTTPException
14
  from fastapi.staticfiles import StaticFiles
15
  from fastapi.middleware.cors import CORSMiddleware
16
- from PIL import Image
17
  import rarfile
18
  import zipfile
19
- import google.generativeai as genai
 
 
 
20
 
21
  # Configure logging
22
  logging.basicConfig(level=logging.INFO)
@@ -154,12 +155,9 @@ def parse_srt(content: str):
154
  return parsed
155
 
156
 
157
- logger = logging.getLogger(__name__)
158
-
159
  def compress_image(image_bytes, max_width=800, quality=80):
160
  """
161
  Compresses an image to WebP (best) or optimized JPEG.
162
- Renamed back to 'compress_image' to fix your error.
163
  """
164
  try:
165
  img = Image.open(io.BytesIO(image_bytes))
@@ -170,7 +168,6 @@ def compress_image(image_bytes, max_width=800, quality=80):
170
  buffer = io.BytesIO()
171
 
172
  # 2. Try WebP first (Best quality/size ratio)
173
- # If you strictly need JPEG, change use_webp to False
174
  use_webp = True
175
 
176
  if use_webp:
@@ -202,14 +199,14 @@ def compress_image(image_bytes, max_width=800, quality=80):
202
 
203
  except Exception as e:
204
  logger.error(f"Image compression failed: {e}")
205
- # If logging isn't setup, print the error so you can see it
206
- print(f"Error: {e}")
207
  return None
208
 
 
209
  def process_batch_gemini(api_key, items, model_name):
210
  try:
211
- genai.configure(api_key=api_key)
212
- model = genai.GenerativeModel(model_name)
 
213
 
214
  prompt_parts = [
215
  "You are a Subtitle Quality Control (QC) bot.",
@@ -224,172 +221,14 @@ def process_batch_gemini(api_key, items, model_name):
224
  prompt_parts.append(f"Index: {item['index']}")
225
  prompt_parts.append(f"Expected Text: \"{item['expected_text']}\"")
226
  prompt_parts.append(f"Image:")
 
 
227
  img = Image.open(io.BytesIO(item['image_data']))
228
  prompt_parts.append(img)
229
 
230
- # Enforce JSON mode
231
- response = model.generate_content(
232
- prompt_parts,
233
- generation_config={"response_mime_type": "application/json"}
234
- )
235
-
236
- text = response.text.replace("```json", "").replace("```", "").strip()
237
-
238
- try:
239
- return json.loads(text)
240
- except json.JSONDecodeError as e:
241
- # Handle Truncated JSON (Output Token Limit Exceeded)
242
- # This happens if the batch size is too large for the model's output window
243
- logger.warning(f"JSON Parse Error (likely truncated response): {e}. Attempting repair...")
244
-
245
- # Repair Strategy: Find the last closing brace '}', discard everything after, and close the array ']'
246
- last_object_idx = text.rfind("}")
247
- if last_object_idx != -1:
248
- repaired_text = text[:last_object_idx+1] + "]"
249
- try:
250
- repaired_data = json.loads(repaired_text)
251
- logger.info(f"Successfully repaired JSON. Recovered {len(repaired_data)}/{len(items)} items.")
252
- return repaired_data
253
- except json.JSONDecodeError:
254
- logger.error("JSON repair failed.")
255
-
256
- return None # Fail gracefully if repair is impossible
257
-
258
- except Exception as e:
259
- logger.error(f"Gemini API Error with key ...{api_key[-4:]}: {e}")
260
- return None
261
-
262
- # --- Main Endpoint ---
263
-
264
- @app.post("/api/analyze")
265
- async def analyze_subtitles(
266
- srt_file: UploadFile = File(...),
267
- media_files: list[UploadFile] = File(...),
268
- api_keys: str = Form(...),
269
- batch_size: int = Form(20),
270
- model_name: str = Form("gemini-3-flash-preview"),
271
- compression_quality: float = Form(0.7)
272
- ):
273
- temp_dir = tempfile.mkdtemp()
274
- try:
275
- # Convert float quality (0.1-1.0) to integer (10-100) for PIL
276
- pil_quality = max(10, min(100, int(compression_quality * 100)))
277
-
278
- # 1. Read SRT
279
- srt_content = (await srt_file.read()).decode('utf-8', errors='ignore')
280
- srt_data = parse_srt(srt_content)
281
- srt_data.sort(key=lambda x: x['startTimeMs'])
282
-
283
- # 2. Process Media
284
- images = []
285
- for file in media_files:
286
- file_path = os.path.join(temp_dir, file.filename)
287
- with open(file_path, "wb") as f:
288
- shutil.copyfileobj(file.file, f)
289
-
290
- if file.filename.lower().endswith('.rar'):
291
- try:
292
- with rarfile.RarFile(file_path) as rf:
293
- rf.extractall(temp_dir)
294
- except rarfile.RarCannotExec:
295
- raise HTTPException(status_code=500, detail="Unrar executable not found in container.")
296
- elif file.filename.lower().endswith('.zip'):
297
- with zipfile.ZipFile(file_path, 'r') as zf:
298
- zf.extractall(temp_dir)
299
-
300
- for root, _, files in os.walk(temp_dir):
301
- for filename in files:
302
- if filename.lower().endswith(('.jpg', '.jpeg', '.png', '.webp', '.bmp')):
303
- full_path = os.path.join(root, filename)
304
- ms = parse_filename_to_ms(filename)
305
- if ms is not None:
306
- with open(full_path, "rb") as f:
307
- raw_bytes = f.read()
308
- compressed = compress_image(raw_bytes, quality=pil_quality)
309
- if compressed:
310
- images.append({
311
- "filename": filename,
312
- "timeMs": ms,
313
- "data": compressed
314
- })
315
-
316
- images.sort(key=lambda x: x['timeMs'])
317
-
318
- # 3. Pair
319
- pairs = []
320
- for i in range(len(images)):
321
- img = images[i]
322
- srt = srt_data[i] if i < len(srt_data) else None
323
-
324
- if srt:
325
- # Create Thumbnail (lower quality for UI speed)
326
- thumb_bytes = compress_image(img['data'], quality=50, max_width=300)
327
- thumb_b64 = base64.b64encode(thumb_bytes).decode('utf-8')
328
-
329
- pairs.append({
330
- "index": i,
331
- "image_data": img['data'],
332
- "expected_text": srt['text'],
333
- "srt_id": srt['id'],
334
- "srt_time": srt['time'],
335
- "filename": img['filename'],
336
- "thumb": f"data:image/jpeg;base64,{thumb_b64}",
337
- "status": "pending"
338
- })
339
-
340
- if not pairs:
341
- return {"status": "error", "message": "No valid image/subtitle pairs found."}
342
-
343
- # 4. Process Gemini
344
- keys = [k.strip() for k in api_keys.split('\n') if k.strip()]
345
- if not keys:
346
- raise HTTPException(status_code=400, detail="No API Keys provided")
347
-
348
- results_map = {}
349
- batches = [pairs[i:i + batch_size] for i in range(0, len(pairs), batch_size)]
350
-
351
- def worker(batch_idx, batch):
352
- key = keys[batch_idx % len(keys)]
353
- return process_batch_gemini(key, batch, model_name)
354
-
355
- with ThreadPoolExecutor(max_workers=len(keys)) as executor:
356
- futures = [executor.submit(worker, i, b) for i, b in enumerate(batches)]
357
- for future in futures:
358
- res = future.result()
359
- if res:
360
- for item in res:
361
- results_map[item['index']] = item
362
-
363
- # 5. Build Output
364
- final_output = []
365
- for p in pairs:
366
- analysis = results_map.get(p['index'])
367
- status = "pending"
368
- reason = ""
369
- detected = ""
370
- if analysis:
371
- status = "match" if analysis['match'] else "mismatch"
372
- reason = analysis.get('reason', '')
373
- detected = analysis.get('detected_text', '')
374
-
375
- final_output.append({
376
- "id": p['index'],
377
- "filename": p['filename'],
378
- "thumb": p['thumb'],
379
- "expected": p['expected_text'],
380
- "detected": detected,
381
- "status": status,
382
- "reason": reason,
383
- "srt_id": p['srt_id'],
384
- "srt_time": p['srt_time']
385
- })
386
-
387
- return {"status": "success", "results": final_output}
388
-
389
- except Exception as e:
390
- logger.error(f"Server Error: {e}")
391
- raise HTTPException(status_code=500, detail=str(e))
392
- finally:
393
- shutil.rmtree(temp_dir)
394
-
395
- app.mount("/", StaticFiles(directory="static", html=True), name="static")
 
9
  from concurrent.futures import ThreadPoolExecutor
10
  from PIL import Image, ImageOps
11
 
 
12
  from fastapi import FastAPI, UploadFile, File, Form, HTTPException
13
  from fastapi.staticfiles import StaticFiles
14
  from fastapi.middleware.cors import CORSMiddleware
 
15
  import rarfile
16
  import zipfile
17
+
18
+ # --- MIGRATION: New SDK Imports ---
19
+ from google import genai
20
+ from google.genai import types
21
 
22
  # Configure logging
23
  logging.basicConfig(level=logging.INFO)
 
155
  return parsed
156
 
157
 
 
 
158
  def compress_image(image_bytes, max_width=800, quality=80):
159
  """
160
  Compresses an image to WebP (best) or optimized JPEG.
 
161
  """
162
  try:
163
  img = Image.open(io.BytesIO(image_bytes))
 
168
  buffer = io.BytesIO()
169
 
170
  # 2. Try WebP first (Best quality/size ratio)
 
171
  use_webp = True
172
 
173
  if use_webp:
 
199
 
200
  except Exception as e:
201
  logger.error(f"Image compression failed: {e}")
 
 
202
  return None
203
 
204
+ # --- MIGRATION: Updated Gemini Processing Function ---
205
  def process_batch_gemini(api_key, items, model_name):
206
  try:
207
+ # 1. Instantiate the Client (New SDK pattern)
208
+ # This replaces genai.configure()
209
+ client = genai.Client(api_key=api_key)
210
 
211
  prompt_parts = [
212
  "You are a Subtitle Quality Control (QC) bot.",
 
221
  prompt_parts.append(f"Index: {item['index']}")
222
  prompt_parts.append(f"Expected Text: \"{item['expected_text']}\"")
223
  prompt_parts.append(f"Image:")
224
+
225
+ # The new SDK handles PIL images directly in the contents list just like the old one
226
  img = Image.open(io.BytesIO(item['image_data']))
227
  prompt_parts.append(img)
228
 
229
+ # 2. Call generate_content via the client
230
+ response = client.models.generate_content(
231
+ model=model_name,
232
+ contents=prompt_parts,
233
+ config=types.GenerateContentConfig(
234
+ response_mime_type="application/json"