Spaces:
Running
Running
| import os | |
| import httpx | |
| from fastapi import FastAPI, Form, HTTPException | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel | |
| from typing import Optional, List | |
| import asyncio | |
| import uvicorn | |
| import pytesseract | |
| from PIL import Image | |
| import io | |
| import re | |
| # Tesseract का Linux पाथ | |
| pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract' | |
| app = FastAPI(title="TMDB + OCR Pro API | Badal Special") | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| OPTIPIX_API = "https://bk939448-image-optimizer-api.hf.space/upload-poster" | |
| TMDB_API_KEY = os.getenv("TMDB_API_KEY") | |
| class ImageMedia(BaseModel): | |
| original_url: str | |
| processed_url: Optional[str] | |
| class ProcessResponse(BaseModel): | |
| title_id: str | |
| tmdb_id: int | |
| requested_shots: int | |
| total_screenshots_scanned: int | |
| poster: Optional[ImageMedia] | |
| screenshots: List[ImageMedia] | |
| # --- 1. OCR Scanner Function --- | |
| def check_text_in_image(image_bytes: bytes) -> bool: | |
| try: | |
| img = Image.open(io.BytesIO(image_bytes)) | |
| img.thumbnail((500, 500)) # फ़ास्ट स्कैनिंग के लिए छोटा करना | |
| img = img.convert('L') # ब्लैक एंड वाइट | |
| # इंग्लिश, हिंदी और तेलुगु स्कैन | |
| text = pytesseract.image_to_string(img, lang='eng+hin+tel') | |
| # सिर्फ़ शब्द और नंबर रखना | |
| clean_text = re.sub(r'[^a-zA-Z0-9\u0900-\u097F\u0C00-\u0C7F]', '', text) | |
| # अगर 4 कैरेक्टर से ज़्यादा टेक्स्ट है, तो यह स्क्रीनशॉट नहीं, पोस्टर है (True) | |
| return len(clean_text) > 4 | |
| except Exception as e: | |
| print(f"OCR Parsing Error: {e}") | |
| return True # रिस्क नहीं लेने का, रिजेक्ट कर दो! | |
| # --- 2. Parallel OptiPix Function --- | |
| async def optimize_image(client: httpx.AsyncClient, raw_url: str, level: str): | |
| form_data = {"level": level, "url": raw_url} | |
| result = {"original_url": raw_url, "processed_url": None} | |
| try: | |
| res = await client.post(OPTIPIX_API, data=form_data, timeout=30.0) | |
| data = res.json() | |
| if data.get("success"): | |
| result["processed_url"] = data.get("url") | |
| except Exception as e: | |
| print(f"OptiPix failed for {raw_url} - Error: {e}") | |
| return result | |
| async def get_media( | |
| title_id: str = Form(..., description="IMDb Title ID (e.g., tt3801314)"), | |
| top_shots: int = Form(3, description="Number of screenshots required"), | |
| level: str = Form("extreme", description="Compression level") | |
| ): | |
| if not TMDB_API_KEY: | |
| raise HTTPException(status_code=500, detail="TMDB_API_KEY is missing!") | |
| async with httpx.AsyncClient(timeout=120.0) as client: | |
| # --- STEP 1: TMDb ID ढूँढना --- | |
| find_url = f"https://api.themoviedb.org/3/find/{title_id}?external_source=imdb_id&api_key={TMDB_API_KEY}" | |
| find_res = await client.get(find_url) | |
| find_data = find_res.json() | |
| movie_results = find_data.get("movie_results", []) | |
| if not movie_results: | |
| return {"error": "TMDb पर इस IMDb ID की कोई मूवी नहीं मिली!"} | |
| tmdb_id = movie_results[0]["id"] | |
| # --- STEP 2: TMDb से इमेजेज लाना --- | |
| images_url = f"https://api.themoviedb.org/3/movie/{tmdb_id}/images?api_key={TMDB_API_KEY}" | |
| img_res = await client.get(images_url) | |
| img_data = img_res.json() | |
| raw_backdrops = img_data.get("backdrops", []) | |
| raw_posters = img_data.get("posters", []) | |
| # 🔥 SMART HACK: सिर्फ़ वो बैकड्रॉप्स लो जिनमें लैंग्वेज 'null' हो | |
| clean_backdrops = [shot for shot in raw_backdrops if shot.get("iso_639_1") is None] | |
| clean_backdrops.sort(key=lambda x: x["width"], reverse=True) | |
| # --- STEP 3: Poster निकालना --- | |
| best_poster_url = None | |
| if raw_posters: | |
| raw_posters.sort(key=lambda x: x["width"], reverse=True) | |
| best_poster_url = f"https://image.tmdb.org/t/p/original{raw_posters[0]['file_path']}" | |
| # --- STEP 4: HARDCORE OCR SCANNING --- | |
| verified_screenshots_urls = [] | |
| for shot in clean_backdrops: | |
| if len(verified_screenshots_urls) >= top_shots: | |
| break # ज़रूरत पूरी हो गई, रुक जाओ | |
| shot_url = f"https://image.tmdb.org/t/p/original{shot['file_path']}" | |
| try: | |
| # इमेज डाउनलोड करके OCR को दो | |
| img_res_dl = await client.get(shot_url, timeout=10.0) | |
| if img_res_dl.status_code == 200: | |
| # Async में OCR चलाओ ताकि सर्वर हैंग न हो | |
| has_text = await asyncio.to_thread(check_text_in_image, img_res_dl.content) | |
| if not has_text: # अगर टेक्स्ट नहीं है, तो पास! | |
| verified_screenshots_urls.append(shot_url) | |
| print(f"Clean Screenshot Passed OCR: {shot_url}") | |
| else: | |
| print(f"Rejected by OCR (Text Found): {shot_url}") | |
| except Exception as e: | |
| print(f"Image download error for OCR: {e}") | |
| continue | |
| # --- STEP 5: पैरेलल ऑप्टिमाइज़ेशन (OptiPix) --- | |
| tasks = [] | |
| if best_poster_url: | |
| tasks.append(optimize_image(client, best_poster_url, level)) | |
| for url in verified_screenshots_urls: | |
| tasks.append(optimize_image(client, url, level)) | |
| results = await asyncio.gather(*tasks) | |
| final_poster = None | |
| final_screenshots = [] | |
| if best_poster_url and results: | |
| final_poster = results[0] | |
| final_screenshots = results[1:] | |
| else: | |
| final_screenshots = results | |
| return ProcessResponse( | |
| title_id=title_id, | |
| tmdb_id=tmdb_id, | |
| requested_shots=top_shots, | |
| total_screenshots_scanned=len(clean_backdrops), | |
| poster=final_poster, | |
| screenshots=final_screenshots | |
| ) | |
| if __name__ == "__main__": | |
| uvicorn.run("app:app", host="0.0.0.0", port=7860) | |