Spaces:

jerecom
/

Still_frame

Paused

File size: 6,868 Bytes

fce7147

import os
import httpx
from fastapi import FastAPI, Form, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import Optional, List
import asyncio
import uvicorn
import pytesseract
from PIL import Image
import io
import re

# Tesseract का Linux पाथ
pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract'

app = FastAPI(title="TMDB + OCR Pro API | Badal Special")

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"], 
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

OPTIPIX_API = "https://jerecom-image-optimizer-api-2.hf.space/upload-poster"
TMDB_API_KEY = os.getenv("TMDB_API_KEY")

class ImageMedia(BaseModel):
    original_url: str
    processed_url: Optional[str]

class ProcessResponse(BaseModel):
    title_id: str
    tmdb_id: int
    requested_shots: int
    total_screenshots_scanned: int
    poster: Optional[ImageMedia]
    screenshots: List[ImageMedia]

# --- 1. OCR Scanner Function ---
def check_text_in_image(image_bytes: bytes) -> bool:
    try:
        img = Image.open(io.BytesIO(image_bytes))
        img.thumbnail((500, 500)) # फ़ास्ट स्कैनिंग के लिए छोटा करना
        img = img.convert('L') # ब्लैक एंड वाइट
        
        # इंग्लिश, हिंदी और तेलुगु स्कैन
        text = pytesseract.image_to_string(img, lang='eng+hin+tel')
        
        # सिर्फ़ शब्द और नंबर रखना
        clean_text = re.sub(r'[^a-zA-Z0-9\u0900-\u097F\u0C00-\u0C7F]', '', text)
        
        # अगर 4 कैरेक्टर से ज़्यादा टेक्स्ट है, तो यह स्क्रीनशॉट नहीं, पोस्टर है (True)
        return len(clean_text) > 4
    except Exception as e:
        print(f"OCR Parsing Error: {e}")
        return True # रिस्क नहीं लेने का, रिजेक्ट कर दो!

# --- 2. Parallel OptiPix Function ---
async def optimize_image(client: httpx.AsyncClient, raw_url: str, level: str):
    form_data = {"level": level, "url": raw_url}
    result = {"original_url": raw_url, "processed_url": None}
    try:
        res = await client.post(OPTIPIX_API, data=form_data, timeout=30.0)
        data = res.json()
        if data.get("success"):
            result["processed_url"] = data.get("url")
    except Exception as e:
        print(f"OptiPix failed for {raw_url} - Error: {e}")
    return result

@app.post("/get-media", response_model=ProcessResponse)
async def get_media(
    title_id: str = Form(..., description="IMDb Title ID (e.g., tt3801314)"),
    top_shots: int = Form(3, description="Number of screenshots required"),
    level: str = Form("extreme", description="Compression level")
):
    if not TMDB_API_KEY:
        raise HTTPException(status_code=500, detail="TMDB_API_KEY is missing!")
        
    async with httpx.AsyncClient(timeout=120.0) as client:
        # --- STEP 1: TMDb ID ढूँढना ---
        find_url = f"https://api.themoviedb.org/3/find/{title_id}?external_source=imdb_id&api_key={TMDB_API_KEY}"
        find_res = await client.get(find_url)
        find_data = find_res.json()
        
        movie_results = find_data.get("movie_results", [])
        if not movie_results:
            return {"error": "TMDb पर इस IMDb ID की कोई मूवी नहीं मिली!"}
            
        tmdb_id = movie_results[0]["id"]
        
        # --- STEP 2: TMDb से इमेजेज लाना ---
        images_url = f"https://api.themoviedb.org/3/movie/{tmdb_id}/images?api_key={TMDB_API_KEY}"
        img_res = await client.get(images_url)
        img_data = img_res.json()
        
        raw_backdrops = img_data.get("backdrops", [])
        raw_posters = img_data.get("posters", [])
        
        # 🔥 SMART HACK: सिर्फ़ वो बैकड्रॉप्स लो जिनमें लैंग्वेज 'null' हो
        clean_backdrops = [shot for shot in raw_backdrops if shot.get("iso_639_1") is None]
        clean_backdrops.sort(key=lambda x: x["width"], reverse=True)
        
        # --- STEP 3: Poster निकालना ---
        best_poster_url = None
        if raw_posters:
            raw_posters.sort(key=lambda x: x["width"], reverse=True)
            best_poster_url = f"https://image.tmdb.org/t/p/original{raw_posters[0]['file_path']}"
            
        # --- STEP 4: HARDCORE OCR SCANNING ---
        verified_screenshots_urls = []
        for shot in clean_backdrops:
            if len(verified_screenshots_urls) >= top_shots:
                break # ज़रूरत पूरी हो गई, रुक जाओ
                
            shot_url = f"https://image.tmdb.org/t/p/original{shot['file_path']}"
            
            try:
                # इमेज डाउनलोड करके OCR को दो
                img_res_dl = await client.get(shot_url, timeout=10.0)
                if img_res_dl.status_code == 200:
                    # Async में OCR चलाओ ताकि सर्वर हैंग न हो
                    has_text = await asyncio.to_thread(check_text_in_image, img_res_dl.content)
                    
                    if not has_text: # अगर टेक्स्ट नहीं है, तो पास!
                        verified_screenshots_urls.append(shot_url)
                        print(f"Clean Screenshot Passed OCR: {shot_url}")
                    else:
                        print(f"Rejected by OCR (Text Found): {shot_url}")
            except Exception as e:
                print(f"Image download error for OCR: {e}")
                continue
                
        # --- STEP 5: पैरेलल ऑप्टिमाइज़ेशन (OptiPix) ---
        tasks = []
        if best_poster_url:
            tasks.append(optimize_image(client, best_poster_url, level))
            
        for url in verified_screenshots_urls:
            tasks.append(optimize_image(client, url, level))
            
        results = await asyncio.gather(*tasks)
        
        final_poster = None
        final_screenshots = []
        
        if best_poster_url and results:
            final_poster = results[0]
            final_screenshots = results[1:]
        else:
            final_screenshots = results

    return ProcessResponse(
        title_id=title_id,
        tmdb_id=tmdb_id,
        requested_shots=top_shots,
        total_screenshots_scanned=len(clean_backdrops),
        poster=final_poster,
        screenshots=final_screenshots
    )

if __name__ == "__main__":
    uvicorn.run("app:app", host="0.0.0.0", port=7860)