File size: 6,868 Bytes
fce7147
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import os
import httpx
from fastapi import FastAPI, Form, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import Optional, List
import asyncio
import uvicorn
import pytesseract
from PIL import Image
import io
import re

# Tesseract का Linux पाथ
pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract'

app = FastAPI(title="TMDB + OCR Pro API | Badal Special")

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"], 
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

OPTIPIX_API = "https://jerecom-image-optimizer-api-2.hf.space/upload-poster"
TMDB_API_KEY = os.getenv("TMDB_API_KEY")

class ImageMedia(BaseModel):
    original_url: str
    processed_url: Optional[str]

class ProcessResponse(BaseModel):
    title_id: str
    tmdb_id: int
    requested_shots: int
    total_screenshots_scanned: int
    poster: Optional[ImageMedia]
    screenshots: List[ImageMedia]

# --- 1. OCR Scanner Function ---
def check_text_in_image(image_bytes: bytes) -> bool:
    try:
        img = Image.open(io.BytesIO(image_bytes))
        img.thumbnail((500, 500)) # फ़ास्ट स्कैनिंग के लिए छोटा करना
        img = img.convert('L') # ब्लैक एंड वाइट
        
        # इंग्लिश, हिंदी और तेलुगु स्कैन
        text = pytesseract.image_to_string(img, lang='eng+hin+tel')
        
        # सिर्फ़ शब्द और नंबर रखना
        clean_text = re.sub(r'[^a-zA-Z0-9\u0900-\u097F\u0C00-\u0C7F]', '', text)
        
        # अगर 4 कैरेक्टर से ज़्यादा टेक्स्ट है, तो यह स्क्रीनशॉट नहीं, पोस्टर है (True)
        return len(clean_text) > 4
    except Exception as e:
        print(f"OCR Parsing Error: {e}")
        return True # रिस्क नहीं लेने का, रिजेक्ट कर दो!

# --- 2. Parallel OptiPix Function ---
async def optimize_image(client: httpx.AsyncClient, raw_url: str, level: str):
    form_data = {"level": level, "url": raw_url}
    result = {"original_url": raw_url, "processed_url": None}
    try:
        res = await client.post(OPTIPIX_API, data=form_data, timeout=30.0)
        data = res.json()
        if data.get("success"):
            result["processed_url"] = data.get("url")
    except Exception as e:
        print(f"OptiPix failed for {raw_url} - Error: {e}")
    return result

@app.post("/get-media", response_model=ProcessResponse)
async def get_media(
    title_id: str = Form(..., description="IMDb Title ID (e.g., tt3801314)"),
    top_shots: int = Form(3, description="Number of screenshots required"),
    level: str = Form("extreme", description="Compression level")
):
    if not TMDB_API_KEY:
        raise HTTPException(status_code=500, detail="TMDB_API_KEY is missing!")
        
    async with httpx.AsyncClient(timeout=120.0) as client:
        # --- STEP 1: TMDb ID ढूँढना ---
        find_url = f"https://api.themoviedb.org/3/find/{title_id}?external_source=imdb_id&api_key={TMDB_API_KEY}"
        find_res = await client.get(find_url)
        find_data = find_res.json()
        
        movie_results = find_data.get("movie_results", [])
        if not movie_results:
            return {"error": "TMDb पर इस IMDb ID की कोई मूवी नहीं मिली!"}
            
        tmdb_id = movie_results[0]["id"]
        
        # --- STEP 2: TMDb से इमेजेज लाना ---
        images_url = f"https://api.themoviedb.org/3/movie/{tmdb_id}/images?api_key={TMDB_API_KEY}"
        img_res = await client.get(images_url)
        img_data = img_res.json()
        
        raw_backdrops = img_data.get("backdrops", [])
        raw_posters = img_data.get("posters", [])
        
        # 🔥 SMART HACK: सिर्फ़ वो बैकड्रॉप्स लो जिनमें लैंग्वेज 'null' हो
        clean_backdrops = [shot for shot in raw_backdrops if shot.get("iso_639_1") is None]
        clean_backdrops.sort(key=lambda x: x["width"], reverse=True)
        
        # --- STEP 3: Poster निकालना ---
        best_poster_url = None
        if raw_posters:
            raw_posters.sort(key=lambda x: x["width"], reverse=True)
            best_poster_url = f"https://image.tmdb.org/t/p/original{raw_posters[0]['file_path']}"
            
        # --- STEP 4: HARDCORE OCR SCANNING ---
        verified_screenshots_urls = []
        for shot in clean_backdrops:
            if len(verified_screenshots_urls) >= top_shots:
                break # ज़रूरत पूरी हो गई, रुक जाओ
                
            shot_url = f"https://image.tmdb.org/t/p/original{shot['file_path']}"
            
            try:
                # इमेज डाउनलोड करके OCR को दो
                img_res_dl = await client.get(shot_url, timeout=10.0)
                if img_res_dl.status_code == 200:
                    # Async में OCR चलाओ ताकि सर्वर हैंग न हो
                    has_text = await asyncio.to_thread(check_text_in_image, img_res_dl.content)
                    
                    if not has_text: # अगर टेक्स्ट नहीं है, तो पास!
                        verified_screenshots_urls.append(shot_url)
                        print(f"Clean Screenshot Passed OCR: {shot_url}")
                    else:
                        print(f"Rejected by OCR (Text Found): {shot_url}")
            except Exception as e:
                print(f"Image download error for OCR: {e}")
                continue
                
        # --- STEP 5: पैरेलल ऑप्टिमाइज़ेशन (OptiPix) ---
        tasks = []
        if best_poster_url:
            tasks.append(optimize_image(client, best_poster_url, level))
            
        for url in verified_screenshots_urls:
            tasks.append(optimize_image(client, url, level))
            
        results = await asyncio.gather(*tasks)
        
        final_poster = None
        final_screenshots = []
        
        if best_poster_url and results:
            final_poster = results[0]
            final_screenshots = results[1:]
        else:
            final_screenshots = results

    return ProcessResponse(
        title_id=title_id,
        tmdb_id=tmdb_id,
        requested_shots=top_shots,
        total_screenshots_scanned=len(clean_backdrops),
        poster=final_poster,
        screenshots=final_screenshots
    )

if __name__ == "__main__":
    uvicorn.run("app:app", host="0.0.0.0", port=7860)