File size: 13,064 Bytes
25ae7fe 87d3243 25ae7fe | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 | import logging
import time
from typing import List, Optional
from fastapi import FastAPI, Request, HTTPException, Query
from fastapi.responses import JSONResponse, FileResponse, StreamingResponse, RedirectResponse
from fastapi.middleware.cors import CORSMiddleware
from fastapi.middleware.gzip import GZipMiddleware
import httpx
from scraper.engine import scraper
from downloader import downloader
import os
import re
from urllib.parse import unquote, quote
from fastapi.staticfiles import StaticFiles
from database import init_db
from keep_alive import keep_alive
import asyncio
import io
# Configure logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
logger = logging.getLogger("backend")
app = FastAPI(title="MEIH Movies API", version="2.0.0")
# --- Simple Caching Layer ---
class MemoryCache:
def __init__(self):
self._cache = {}
def get(self, key: str):
item = self._cache.get(key)
if item:
expire_time, data = item
if time.time() < expire_time:
return data
else:
del self._cache[key]
return None
def set(self, key: str, data, ttl_seconds: int = 600): # Default 10 mins
self._cache[key] = (time.time() + ttl_seconds, data)
cache = MemoryCache()
async def warm_scraper():
"""Warms up the scraper by making an initial request to sync cookies."""
logger.info("🔥 Warming up scraper in background...")
try:
# Give services a few more seconds to be truly ready
await asyncio.sleep(5)
await scraper.fetch_home(page=1)
logger.info("✅ Scraper warmed up and cookies synced")
except Exception as e:
logger.warning(f"⚠️ Scraper warmup failed (will retry on first request): {e}")
@app.on_event("startup")
async def startup_event():
await init_db()
logger.info("🚀 Database initialized and ready")
# Detect if running on Hugging Face
is_hf = os.environ.get("SPACE_ID") is not None or os.environ.get("HF_SPACE") is not None
if not is_hf:
# Start Keep-Alive service (only for non-HF environments)
asyncio.create_task(keep_alive.start())
# Start Warm-up service
asyncio.create_task(warm_scraper())
# Start Nitro Pre-fetch (Populates cache in background)
if hasattr(scraper, '_turbo_prefetch'):
asyncio.create_task(scraper._turbo_prefetch())
logger.info("🔄 Background services activated")
else:
logger.info("🤗 Running on Hugging Face - Lightweight mode enabled")
# Just warm up the scraper without heavy pre-fetching
asyncio.create_task(warm_scraper())
# Enable CORS for frontend
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
app.add_middleware(GZipMiddleware, minimum_size=1000)
@app.get("/")
async def root():
return {"status": "ok", "message": "MEIH Movies API is running"}
@app.get("/health")
async def health():
return {"status": "online", "timestamp": time.time()}
@app.get("/latest")
async def get_latest(page: int = 1):
cache_key = f"latest_{page}"
cached = cache.get(cache_key)
if cached:
return cached
try:
items = await scraper.fetch_home(page=page)
if items:
cache.set(cache_key, items)
return items
except Exception as e:
logger.error(f"Error fetching latest: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.get("/category/{cat_id}")
async def get_category(cat_id: str, page: int = 1):
cache_key = f"cat_{cat_id}_{page}"
cached = cache.get(cache_key)
if cached:
return cached
try:
items = await scraper.fetch_category(cat_id, page=page)
if items:
cache.set(cache_key, items)
return items
except Exception as e:
logger.error(f"Error fetching category {cat_id}: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.get("/search")
async def search(q: str):
cache_key = f"search_{q}"
cached = cache.get(cache_key)
if cached:
return cached
try:
items = await scraper.search(q)
if items:
cache.set(cache_key, items, ttl_seconds=3600) # Search results cache longer
return items
except Exception as e:
logger.error(f"Error searching for {q}: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.get("/details/{safe_id}")
async def get_details(safe_id: str):
cache_key = f"details_{safe_id}"
cached = cache.get(cache_key)
if cached:
return cached
try:
details = await scraper.fetch_details(safe_id)
if not details:
return JSONResponse(status_code=404, content={"error": "Content not found"})
cache.set(cache_key, details, ttl_seconds=86400) # Details cache for 24h
return details
except Exception as e:
logger.error(f"Error fetching details for {safe_id}: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.get("/proxy/image")
async def proxy_image(url: str):
if not url:
raise HTTPException(status_code=400, detail="URL is required")
url = unquote(url)
# --- Image Disk Cache ---
cache_dir = os.path.join(base_dir, "cache", "images")
os.makedirs(cache_dir, exist_ok=True)
# Generate simple hash for filename
import hashlib
url_hash = hashlib.md5(url.encode()).hexdigest()
cache_path = os.path.join(cache_dir, f"{url_hash}.img")
# 1. Check if cached
if os.path.exists(cache_path):
# Check cache age (optional - 1 week)
if time.time() - os.path.getmtime(cache_path) < 604800:
return FileResponse(
cache_path,
media_type="image/jpeg", # Approximate, browser will handle
headers={"Cache-Control": "public, max-age=31536000"}
)
try:
# Using follow_redirects and a longer timeout for images
async with httpx.AsyncClient(timeout=20.0, follow_redirects=True) as client:
resp = await client.get(url, headers={"User-Agent": scraper.headers["User-Agent"]})
if resp.status_code == 200:
# Save to cache
content = resp.content
with open(cache_path, "wb") as f:
f.write(content)
# Return the image stream directly
return StreamingResponse(
io.BytesIO(content),
media_type=resp.headers.get("Content-Type", "image/jpeg"),
headers={"Cache-Control": "public, max-age=31536000"}
)
else:
logger.warning(f"Failed to proxy image {url} (Status: {resp.status_code})")
return JSONResponse(status_code=resp.status_code, content={"error": f"Failed (Status {resp.status_code})"})
except httpx.TimeoutException:
logger.warning(f"Timeout proxying image: {url}")
return JSONResponse(status_code=504, content={"error": "Image timeout"})
except Exception as e:
logger.error(f"Proxy image error for {url}: {type(e).__name__} - {str(e)}")
return JSONResponse(status_code=500, content={"error": str(e)})
@app.get("/download/info")
async def get_download_info(url: str):
try:
info = await downloader.get_info(url)
return info
except Exception as e:
logger.error(f"Download info error for {url}: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.get("/download/file")
async def download_file(url: str, filename: str = "video.mp4"):
"""Handles file downloads, proxying if necessary to bypass IP blocks or hotlink protection."""
if not url:
raise HTTPException(status_code=400, detail="URL is required")
url = unquote(url)
# Domains that REQUIRE proxying (IP-bound or strict hotlink protection)
proxy_domains = [
"googlevideo.com",
"manifest.googlevideo.com",
"larozavideo.net",
"larooza.site",
"larooza.mom",
"laroza-tv.net",
"youtube.com",
"youtu.be"
]
should_proxy = any(domain in url for domain in proxy_domains)
if should_proxy:
logger.info(f"🛡️ Proxying download: {filename[:50]}...")
# Clean filename for the ASCII part of Content-Disposition
# Remove non-ASCII characters for the fallback filename
ascii_filename = re.sub(r'[^\x00-\x7F]+', '_', filename)
encoded_filename = quote(filename)
async def stream_generator():
async with httpx.AsyncClient(timeout=None, follow_redirects=True) as client:
try:
async with client.stream("GET", url, headers={"User-Agent": scraper.headers["User-Agent"]}) as resp:
if resp.status_code != 200:
logger.error(f"Proxy source returned {resp.status_code}")
return
# We can't easily set Content-Length here because StreamingResponse
# starts before we have all chunks, but we can set it in the outer response
async for chunk in resp.aiter_bytes(chunk_size=1024*1024):
yield chunk
except Exception as e:
logger.error(f"Streaming error: {e}")
# Get initial headers to find content length/type if possible
try:
async with httpx.AsyncClient(timeout=10.0, follow_redirects=True) as client:
head_resp = await client.head(url, headers={"User-Agent": scraper.headers["User-Agent"]})
content_length = head_resp.headers.get("Content-Length")
content_type = head_resp.headers.get("Content-Type", "video/mp4")
except:
content_length = None
content_type = "video/mp4"
headers = {
"Content-Disposition": f"attachment; filename=\"{ascii_filename}\"; filename*=UTF-8''{encoded_filename}",
"Access-Control-Expose-Headers": "Content-Disposition"
}
if content_length:
headers["Content-Length"] = content_length
return StreamingResponse(stream_generator(), media_type=content_type, headers=headers)
# For other sources, a simple redirect is much faster and saves server bandwidth
return RedirectResponse(url=url)
@app.get("/health")
async def health():
# Check FlareSolverr
fs_status = "OFFLINE"
try:
# Increase timeout as solver might be busy
async with httpx.AsyncClient(timeout=5.0) as client:
resp = await client.get("http://localhost:8191/health")
if resp.status_code == 200:
fs_status = "ONLINE"
except:
pass
return {
"backend": "ONLINE",
"flaresolverr": fs_status,
"scraper_sync": scraper._cookies_synced,
"timestamp": time.time()
}
# --- Frontend Mounting ---
# This ensures that our React app is served directly by FastAPI in production
# Check both relative and same-level structures for Docker/Local compatibility
base_dir = os.path.dirname(__file__)
frontend_path = os.path.join(base_dir, "meih-netflix-clone", "dist")
if not os.path.exists(frontend_path):
# Try one level up (local dev structure)
frontend_path = os.path.join(base_dir, "..", "meih-netflix-clone", "dist")
if os.path.exists(frontend_path):
# Assets are usually in dist/assets and referenced as /assets/ in Vite
assets_path = os.path.join(frontend_path, "assets")
if os.path.exists(assets_path):
app.mount("/assets", StaticFiles(directory=assets_path), name="assets")
@app.get("/{full_path:path}")
async def serve_frontend(full_path: str):
# Prevent infinite recursion for API routes if someone hits a wrong URL
if full_path.startswith(("api/", "latest", "category/", "search", "details", "proxy", "download", "health")):
return JSONResponse(status_code=404, content={"error": "Not Found"})
# If the path starts with api/ or other backend routes, it should have been caught above
# Otherwise, serve the main index.html for React Router to handle
file_path = os.path.join(frontend_path, full_path)
if os.path.exists(file_path) and os.path.isfile(file_path):
return FileResponse(file_path)
return FileResponse(os.path.join(frontend_path, "index.html"))
else:
logger.warning(f"Frontend dist folder not found at {frontend_path}. Frontend serving disabled.")
if __name__ == "__main__":
import uvicorn
# Use port 7860 for Hugging Face Spaces compatibility
uvicorn.run(app, host="0.0.0.0", port=7860)
|