Spaces:

bigbossmonster
/

testing

Sleeping

App Files Files Community

bigbossmonster commited on Jan 21

Commit

e74e277

verified ·

1 Parent(s): c24bff4

Update app.py

Browse files

Files changed (1) hide show

app.py +132 -199

app.py CHANGED Viewed

@@ -2,12 +2,12 @@ import os
 import io
 import re
 import json
-import tempfile
 import shutil
 import logging
 import base64
 from concurrent.futures import ThreadPoolExecutor
-from PIL import Image, ImageOps
 from fastapi import FastAPI, UploadFile, File, Form, HTTPException
 from fastapi.staticfiles import StaticFiles
@@ -15,7 +15,6 @@ from fastapi.middleware.cors import CORSMiddleware
 import rarfile
 import zipfile
-# --- MIGRATION: New SDK Imports ---
 from google import genai
 from google.genai import types
@@ -33,6 +32,10 @@ app.add_middleware(
     allow_headers=["*"],
 )
 # --- Utility Functions ---
 def parse_srt_time_to_ms(time_str):
@@ -52,28 +55,33 @@ def parse_filename_to_ms(filename):
     return (h * 3600000) + (m * 60000) + (s * 1000) + ms
 def parse_srt(content: str):
     # Normalize line endings
     content = content.replace('\r\n', '\n').replace('\r', '\n')
-    # Split by double newline (standard SRT block separator)
-    blocks = re.split(r'\n\n+', content.strip())
     parsed = []
-    for block in blocks:
-        lines = [l.strip() for l in block.split('\n') if l.strip()]
-        if len(lines) < 2:
-            continue
-        srt_id = lines[0]
-        time_range = lines[1]
-        # Check if there is actually text after the timestamp
-        # If there are lines after index 1, join them; otherwise, it's a blank sub
-        if len(lines) > 2:
-            text = "\n".join(lines[2:])
-        else:
-            text = "" # Explicitly blank
         try:
             start_time_str = time_range.split('-->')[0].strip()
             start_ms = parse_srt_time_to_ms(start_time_str)
@@ -84,117 +92,52 @@ def parse_srt(content: str):
                 "text": text
             })
         except Exception as e:
-            logger.warning(f"Skipping malformed SRT block: {block[:50]}... Error: {e}")
     return parsed
 def compress_image(image_bytes, max_width=800, quality=80):
-    """
-    Compresses an image to WebP (best) or optimized JPEG.
-    """
     try:
         img = Image.open(io.BytesIO(image_bytes))
-        # 1. Efficient Resize (Using thumbnail prevents upscaling artifacts)
         img.thumbnail((max_width, max_width), Image.Resampling.LANCZOS)
         buffer = io.BytesIO()
-        # 2. Try WebP first (Best quality/size ratio)
-        use_webp = True
-        if use_webp:
-            # method=6 is the strongest compression algo for WebP
-            img.save(buffer, format="WEBP", quality=quality, method=6)
-        else:
-            # Fallback: Optimized JPEG
-            # Handle transparency (paste on white)
-            if img.mode in ('RGBA', 'LA') or (img.mode == 'P' and 'transparency' in img.info):
-                background = Image.new('RGB', img.size, (255, 255, 255))
-                # Handle paletted images with transparency
-                if img.mode == 'P':
-                    img = img.convert('RGBA')
-                background.paste(img, mask=img.split()[3])
-                img = background
-            elif img.mode != 'RGB':
-                img = img.convert('RGB')
-            img.save(
-                buffer,
-                format="JPEG",
-                quality=quality,
-                optimize=True,
-                progressive=True,
-                subsampling=0
-            )
         return buffer.getvalue()
     except Exception as e:
-        logger.error(f"Image compression failed: {e}")
         return None
-# --- MIGRATION: Updated Gemini Processing Function ---
 def process_batch_gemini(api_key, items, model_name):
     try:
-        # 1. Instantiate the Client (New SDK pattern)
-        # This replaces genai.configure()
         client = genai.Client(api_key=api_key)
         prompt_parts = [
             "You are a Subtitle Quality Control (QC) bot.",
             f"I will provide {len(items)} images and the EXPECTED subtitle text for each.",
-            "Return a JSON array strictly following this schema:",
             '[{"index": <int>, "detected_text": "<string>", "match": <bool>, "reason": "<string>"}, ...]',
             "Return ONLY the JSON. No markdown."
         ]
         for item in items:
             prompt_parts.append(f"\n--- Item {item['index']} ---")
-            prompt_parts.append(f"Index: {item['index']}")
-            prompt_parts.append(f"Expected Text: \"{item['expected_text']}\"")
-            prompt_parts.append(f"Image:")
-            # The new SDK handles PIL images directly in the contents list just like the old one
-            img = Image.open(io.BytesIO(item['image_data']))
-            prompt_parts.append(img)
-        # 2. Call generate_content via the client
         response = client.models.generate_content(
             model=model_name,
             contents=prompt_parts,
-            config=types.GenerateContentConfig(
-                response_mime_type="application/json"
-            )
         )
         text = response.text.replace("```json", "").replace("```", "").strip()
-        try:
-            return json.loads(text)
-        except json.JSONDecodeError as e:
-            # Handle Truncated JSON (Output Token Limit Exceeded)
-            logger.warning(f"JSON Parse Error (likely truncated response): {e}. Attempting repair...")
-            # Repair Strategy: Find the last closing brace '}', discard everything after, and close the array ']'
-            last_object_idx = text.rfind("}")
-            if last_object_idx != -1:
-                repaired_text = text[:last_object_idx+1] + "]"
-                try:
-                    repaired_data = json.loads(repaired_text)
-                    logger.info(f"Successfully repaired JSON. Recovered {len(repaired_data)}/{len(items)} items.")
-                    return repaired_data
-                except json.JSONDecodeError:
-                    logger.error("JSON repair failed.")
-            return None # Fail gracefully if repair is impossible
     except Exception as e:
-        logger.error(f"Gemini API Error with key ...{api_key[-4:]}: {e}")
         return None
-# --- Main Endpoint ---
 @app.post("/api/analyze")
 async def analyze_subtitles(
@@ -202,129 +145,119 @@ async def analyze_subtitles(
     media_files: list[UploadFile] = File(...),
     api_keys: str = Form(...),
     batch_size: int = Form(20),
-    model_name: str = Form("gemini-2.0-flash"), # Updated default model hint
     compression_quality: float = Form(0.7)
 ):
-    temp_dir = tempfile.mkdtemp()
     try:
-        # Convert float quality (0.1-1.0) to integer (10-100) for PIL
         pil_quality = max(10, min(100, int(compression_quality * 100)))
-        # 1. Read SRT
-        srt_content = (await srt_file.read()).decode('utf-8', errors='ignore')
-        srt_data = parse_srt(srt_content)
         srt_data.sort(key=lambda x: x['startTimeMs'])
-        # 2. Process Media
-        images = []
         for file in media_files:
-            file_path = os.path.join(temp_dir, file.filename)
             with open(file_path, "wb") as f:
                 shutil.copyfileobj(file.file, f)
             if file.filename.lower().endswith('.rar'):
-                try:
-                    with rarfile.RarFile(file_path) as rf:
-                        rf.extractall(temp_dir)
-                except rarfile.RarCannotExec:
-                     raise HTTPException(status_code=500, detail="Unrar executable not found in container.")
             elif file.filename.lower().endswith('.zip'):
                 with zipfile.ZipFile(file_path, 'r') as zf:
-                    zf.extractall(temp_dir)
-        for root, _, files in os.walk(temp_dir):
-            for filename in files:
-                if filename.lower().endswith(('.jpg', '.jpeg', '.png', '.webp', '.bmp')):
-                    full_path = os.path.join(root, filename)
-                    ms = parse_filename_to_ms(filename)
-                    if ms is not None:
-                        with open(full_path, "rb") as f:
-                            raw_bytes = f.read()
-                            compressed = compress_image(raw_bytes, quality=pil_quality)
-                            if compressed:
-                                images.append({
-                                    "filename": filename,
-                                    "timeMs": ms,
-                                    "data": compressed
-                                })
-        images.sort(key=lambda x: x['timeMs'])
-        # 3. Pair
-        pairs = []
-        for i in range(len(images)):
-            img = images[i]
-            srt = srt_data[i] if i < len(srt_data) else None
-            if srt:
-                # Create Thumbnail (lower quality for UI speed)
-                thumb_bytes = compress_image(img['data'], quality=50, max_width=300)
-                thumb_b64 = base64.b64encode(thumb_bytes).decode('utf-8')
-                pairs.append({
-                    "index": i,
-                    "image_data": img['data'],
-                    "expected_text": srt['text'],
-                    "srt_id": srt['id'],
-                    "srt_time": srt['time'],
-                    "filename": img['filename'],
-                    "thumb": f"data:image/jpeg;base64,{thumb_b64}",
-                    "status": "pending"
-                })
-        if not pairs:
-            return {"status": "error", "message": "No valid image/subtitle pairs found."}
-        # 4. Process Gemini
-        keys = [k.strip() for k in api_keys.split('\n') if k.strip()]
-        if not keys:
-            raise HTTPException(status_code=400, detail="No API Keys provided")
-        results_map = {}
-        batches = [pairs[i:i + batch_size] for i in range(0, len(pairs), batch_size)]
-        def worker(batch_idx, batch):
-            key = keys[batch_idx % len(keys)]
-            return process_batch_gemini(key, batch, model_name)
-        with ThreadPoolExecutor(max_workers=len(keys)) as executor:
-            futures = [executor.submit(worker, i, b) for i, b in enumerate(batches)]
-            for future in futures:
-                res = future.result()
-                if res:
-                    for item in res:
-                        results_map[item['index']] = item
-        # 5. Build Output
-        final_output = []
-        for p in pairs:
-            analysis = results_map.get(p['index'])
-            status = "pending"
-            reason = ""
-            detected = ""
-            if analysis:
-                status = "match" if analysis['match'] else "mismatch"
-                reason = analysis.get('reason', '')
-                detected = analysis.get('detected_text', '')
-            final_output.append({
-                "id": p['index'],
-                "filename": p['filename'],
-                "thumb": p['thumb'],
-                "expected": p['expected_text'],
-                "detected": detected,
-                "status": status,
-                "reason": reason,
-                "srt_id": p['srt_id'],
-                "srt_time": p['srt_time']
             })
         return {"status": "success", "results": final_output}
-    except Exception as e:
-        logger.error(f"Server Error: {e}")
-        raise HTTPException(status_code=500, detail=str(e))
-    finally:
-        shutil.rmtree(temp_dir)
 app.mount("/", StaticFiles(directory="static", html=True), name="static")

 import io
 import re
 import json
+import uuid
 import shutil
 import logging
 import base64
 from concurrent.futures import ThreadPoolExecutor
+from PIL import Image
 from fastapi import FastAPI, UploadFile, File, Form, HTTPException
 from fastapi.staticfiles import StaticFiles
 import rarfile
 import zipfile
 from google import genai
 from google.genai import types
     allow_headers=["*"],
 )
+# Persistent storage directory
+TASKS_DIR = "data_tasks"
+os.makedirs(TASKS_DIR, exist_ok=True)
 # --- Utility Functions ---
 def parse_srt_time_to_ms(time_str):
     return (h * 3600000) + (m * 60000) + (s * 1000) + ms
 def parse_srt(content: str):
+    """
+    Robust Regex Parser for SRT.
+    Handles blank subtitles and inconsistent newlines by searching for patterns
+    rather than splitting by newlines.
+    """
     # Normalize line endings
     content = content.replace('\r\n', '\n').replace('\r', '\n')
+    # Pattern explanation:
+    # (\d+)                 -> Group 1: ID
+    # \n                    -> Newline
+    # (\d{2}:\d{2}:.*)      -> Group 2: Timestamp line
+    # \n                    -> Newline
+    # (.*?)                 -> Group 3: Subtitle text (non-greedy)
+    # (?=\n\d+\n|\Z)        -> Lookahead: Stop when we see the next ID or end of file
+    pattern = re.compile(r'(\d+)\n(\d{2}:\d{2}:\d{2}[,.]\d{3}\s*-->\s*\d{2}:\d{2}:\d{2}[,.]\d{3})\n(.*?)(?=\n\d+\n|\Z)', re.DOTALL)
+    matches = pattern.findall(content)
     parsed = []
+    for m in matches:
+        srt_id = m[0].strip()
+        time_range = m[1].strip()
+        text = m[2].strip() # This will correctly be "" if the line is empty
         try:
+            # Extract start time for sorting
             start_time_str = time_range.split('-->')[0].strip()
             start_ms = parse_srt_time_to_ms(start_time_str)
                 "text": text
             })
         except Exception as e:
+            logger.warning(f"Error parsing block {srt_id}: {e}")
     return parsed
 def compress_image(image_bytes, max_width=800, quality=80):
     try:
         img = Image.open(io.BytesIO(image_bytes))
         img.thumbnail((max_width, max_width), Image.Resampling.LANCZOS)
         buffer = io.BytesIO()
+        img.save(buffer, format="WEBP", quality=quality, method=6)
         return buffer.getvalue()
     except Exception as e:
+        logger.error(f"Compression error: {e}")
         return None
 def process_batch_gemini(api_key, items, model_name):
     try:
         client = genai.Client(api_key=api_key)
         prompt_parts = [
             "You are a Subtitle Quality Control (QC) bot.",
             f"I will provide {len(items)} images and the EXPECTED subtitle text for each.",
+            "Return a JSON array strictly: "
             '[{"index": <int>, "detected_text": "<string>", "match": <bool>, "reason": "<string>"}, ...]',
             "Return ONLY the JSON. No markdown."
         ]
         for item in items:
+            # Handle empty expected text explicitly for the AI
+            exp_text = item['expected_text'] if item['expected_text'].strip() else "[BLANK/EMPTY]"
             prompt_parts.append(f"\n--- Item {item['index']} ---")
+            prompt_parts.append(f"Expected Text: \"{exp_text}\"")
+            prompt_parts.append(Image.open(io.BytesIO(item['image_data'])))
         response = client.models.generate_content(
             model=model_name,
             contents=prompt_parts,
+            config=types.GenerateContentConfig(response_mime_type="application/json")
         )
         text = response.text.replace("```json", "").replace("```", "").strip()
+        return json.loads(text)
     except Exception as e:
+        logger.error(f"Gemini API Error: {e}")
         return None
+# --- Endpoints ---
 @app.post("/api/analyze")
 async def analyze_subtitles(
     media_files: list[UploadFile] = File(...),
     api_keys: str = Form(...),
     batch_size: int = Form(20),
+    model_name: str = Form("gemini-2.0-flash"),
     compression_quality: float = Form(0.7)
 ):
+    task_id = str(uuid.uuid4())
+    task_dir = os.path.join(TASKS_DIR, task_id)
+    os.makedirs(task_dir, exist_ok=True)
+    should_cleanup = False
     try:
         pil_quality = max(10, min(100, int(compression_quality * 100)))
+        # 1. Save and Parse SRT
+        srt_path = os.path.join(task_dir, "input.srt")
+        srt_bytes = await srt_file.read()
+        with open(srt_path, "wb") as f:
+            f.write(srt_bytes)
+        srt_data = parse_srt(srt_bytes.decode('utf-8', errors='ignore'))
         srt_data.sort(key=lambda x: x['startTimeMs'])
+        # 2. Extract Media
         for file in media_files:
+            file_path = os.path.join(task_dir, file.filename)
             with open(file_path, "wb") as f:
                 shutil.copyfileobj(file.file, f)
             if file.filename.lower().endswith('.rar'):
+                with rarfile.RarFile(file_path) as rf:
+                    rf.extractall(task_dir)
             elif file.filename.lower().endswith('.zip'):
                 with zipfile.ZipFile(file_path, 'r') as zf:
+                    zf.extractall(task_dir)
+        # 3. Pair and Process (shared logic)
+        return await run_core_analysis(task_dir, srt_data, api_keys, batch_size, model_name, pil_quality, task_id)
+    except Exception as e:
+        logger.error(f"Server Error: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/api/retry")
+async def retry_analysis(
+    task_id: str = Form(...),
+    api_keys: str = Form(...),
+    batch_size: int = Form(20),
+    model_name: str = Form("gemini-2.0-flash"),
+    compression_quality: float = Form(0.7)
+):
+    task_dir = os.path.join(TASKS_DIR, task_id)
+    if not os.path.exists(task_dir):
+        raise HTTPException(status_code=404, detail="Task files not found.")
+    srt_path = os.path.join(task_dir, "input.srt")
+    with open(srt_path, "r", encoding="utf-8", errors="ignore") as f:
+        srt_data = parse_srt(f.read())
+    pil_quality = max(10, min(100, int(compression_quality * 100)))
+    return await run_core_analysis(task_dir, srt_data, api_keys, batch_size, model_name, pil_quality, task_id)
+async def run_core_analysis(task_dir, srt_data, api_keys, batch_size, model_name, pil_quality, task_id):
+    images = []
+    for root, _, files in os.walk(task_dir):
+        for filename in files:
+            if filename.lower().endswith(('.jpg', '.jpeg', '.png', '.webp', '.bmp')):
+                ms = parse_filename_to_ms(filename)
+                if ms is not None:
+                    with open(os.path.join(root, filename), "rb") as f:
+                        comp = compress_image(f.read(), quality=pil_quality)
+                        if comp: images.append({"filename": filename, "timeMs": ms, "data": comp})
+    images.sort(key=lambda x: x['timeMs'])
+    pairs = []
+    for i, img in enumerate(images):
+        srt = srt_data[i] if i < len(srt_data) else None
+        if srt:
+            thumb = compress_image(img['data'], quality=40, max_width=200)
+            pairs.append({
+                "index": i, "image_data": img['data'], "expected_text": srt['text'],
+                "srt_id": srt['id'], "srt_time": srt['time'], "filename": img['filename'],
+                "thumb": f"data:image/webp;base64,{base64.b64encode(thumb).decode()}",
+                "status": "pending"
             })
+    keys = [k.strip() for k in api_keys.split('\n') if k.strip()]
+    results_map = {}
+    batches = [pairs[i:i + batch_size] for i in range(0, len(pairs), batch_size)]
+    with ThreadPoolExecutor(max_workers=len(keys)) as executor:
+        futures = [executor.submit(process_batch_gemini, keys[i % len(keys)], b, model_name) for i, b in enumerate(batches)]
+        for f in futures:
+            res = f.result()
+            if res:
+                for item in res: results_map[item['index']] = item
+    final_output = []
+    any_pending = False
+    for p in pairs:
+        res = results_map.get(p['index'])
+        status = ("match" if res['match'] else "mismatch") if res else "pending"
+        if status == "pending": any_pending = True
+        final_output.append({
+            "id": p['index'], "status": status, "expected": p['expected_text'],
+            "detected": res.get('detected_text', '') if res else "",
+            "reason": res.get('reason', '') if res else "",
+            "thumb": p['thumb'], "filename": p['filename'], "srt_id": p['srt_id']
+        })
+    if not any_pending:
+        shutil.rmtree(task_dir)
         return {"status": "success", "results": final_output}
+    return {"status": "partial", "task_id": task_id, "results": final_output}
 app.mount("/", StaticFiles(directory="static", html=True), name="static")