Spaces:

minaewrw
/

mina-api

Paused

Mina

Fix: Move DB to /tmp and optimize health checks for HF

87d3243 5 months ago

13.1 kB

	import logging
	import time
	from typing import List, Optional
	from fastapi import FastAPI, Request, HTTPException, Query
	from fastapi.responses import JSONResponse, FileResponse, StreamingResponse, RedirectResponse
	from fastapi.middleware.cors import CORSMiddleware
	from fastapi.middleware.gzip import GZipMiddleware
	import httpx
	from scraper.engine import scraper
	from downloader import downloader
	import os
	import re
	from urllib.parse import unquote, quote
	from fastapi.staticfiles import StaticFiles
	from database import init_db
	from keep_alive import keep_alive
	import asyncio
	import io

	# Configure logging
	logging.basicConfig(
	level=logging.INFO,
	format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
	datefmt="%Y-%m-%d %H:%M:%S",
	)
	logger = logging.getLogger("backend")

	app = FastAPI(title="MEIH Movies API", version="2.0.0")

	# --- Simple Caching Layer ---
	class MemoryCache:
	def __init__(self):
	self._cache = {}

	def get(self, key: str):
	item = self._cache.get(key)
	if item:
	expire_time, data = item
	if time.time() < expire_time:
	return data
	else:
	del self._cache[key]
	return None

	def set(self, key: str, data, ttl_seconds: int = 600): # Default 10 mins
	self._cache[key] = (time.time() + ttl_seconds, data)

	cache = MemoryCache()

	async def warm_scraper():
	"""Warms up the scraper by making an initial request to sync cookies."""
	logger.info("🔥 Warming up scraper in background...")
	try:
	# Give services a few more seconds to be truly ready
	await asyncio.sleep(5)
	await scraper.fetch_home(page=1)
	logger.info("✅ Scraper warmed up and cookies synced")
	except Exception as e:
	logger.warning(f"⚠️ Scraper warmup failed (will retry on first request): {e}")

	@app.on_event("startup")
	async def startup_event():
	await init_db()
	logger.info("🚀 Database initialized and ready")

	# Detect if running on Hugging Face
	is_hf = os.environ.get("SPACE_ID") is not None or os.environ.get("HF_SPACE") is not None

	if not is_hf:
	# Start Keep-Alive service (only for non-HF environments)
	asyncio.create_task(keep_alive.start())
	# Start Warm-up service
	asyncio.create_task(warm_scraper())
	# Start Nitro Pre-fetch (Populates cache in background)
	if hasattr(scraper, '_turbo_prefetch'):
	asyncio.create_task(scraper._turbo_prefetch())
	logger.info("🔄 Background services activated")
	else:
	logger.info("🤗 Running on Hugging Face - Lightweight mode enabled")
	# Just warm up the scraper without heavy pre-fetching
	asyncio.create_task(warm_scraper())


	# Enable CORS for frontend
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)
	app.add_middleware(GZipMiddleware, minimum_size=1000)

	@app.get("/")
	async def root():
	return {"status": "ok", "message": "MEIH Movies API is running"}

	@app.get("/health")
	async def health():
	return {"status": "online", "timestamp": time.time()}

	@app.get("/latest")
	async def get_latest(page: int = 1):
	cache_key = f"latest_{page}"
	cached = cache.get(cache_key)
	if cached:
	return cached

	try:
	items = await scraper.fetch_home(page=page)
	if items:
	cache.set(cache_key, items)
	return items
	except Exception as e:
	logger.error(f"Error fetching latest: {e}")
	raise HTTPException(status_code=500, detail=str(e))

	@app.get("/category/{cat_id}")
	async def get_category(cat_id: str, page: int = 1):
	cache_key = f"cat_{cat_id}_{page}"
	cached = cache.get(cache_key)
	if cached:
	return cached

	try:
	items = await scraper.fetch_category(cat_id, page=page)
	if items:
	cache.set(cache_key, items)
	return items
	except Exception as e:
	logger.error(f"Error fetching category {cat_id}: {e}")
	raise HTTPException(status_code=500, detail=str(e))

	@app.get("/search")
	async def search(q: str):
	cache_key = f"search_{q}"
	cached = cache.get(cache_key)
	if cached:
	return cached

	try:
	items = await scraper.search(q)
	if items:
	cache.set(cache_key, items, ttl_seconds=3600) # Search results cache longer
	return items
	except Exception as e:
	logger.error(f"Error searching for {q}: {e}")
	raise HTTPException(status_code=500, detail=str(e))

	@app.get("/details/{safe_id}")
	async def get_details(safe_id: str):
	cache_key = f"details_{safe_id}"
	cached = cache.get(cache_key)
	if cached:
	return cached

	try:
	details = await scraper.fetch_details(safe_id)
	if not details:
	return JSONResponse(status_code=404, content={"error": "Content not found"})

	cache.set(cache_key, details, ttl_seconds=86400) # Details cache for 24h
	return details
	except Exception as e:
	logger.error(f"Error fetching details for {safe_id}: {e}")
	raise HTTPException(status_code=500, detail=str(e))

	@app.get("/proxy/image")
	async def proxy_image(url: str):
	if not url:
	raise HTTPException(status_code=400, detail="URL is required")

	url = unquote(url)

	# --- Image Disk Cache ---
	cache_dir = os.path.join(base_dir, "cache", "images")
	os.makedirs(cache_dir, exist_ok=True)

	# Generate simple hash for filename
	import hashlib
	url_hash = hashlib.md5(url.encode()).hexdigest()
	cache_path = os.path.join(cache_dir, f"{url_hash}.img")

	# 1. Check if cached
	if os.path.exists(cache_path):
	# Check cache age (optional - 1 week)
	if time.time() - os.path.getmtime(cache_path) < 604800:
	return FileResponse(
	cache_path,
	media_type="image/jpeg", # Approximate, browser will handle
	headers={"Cache-Control": "public, max-age=31536000"}
	)

	try:
	# Using follow_redirects and a longer timeout for images
	async with httpx.AsyncClient(timeout=20.0, follow_redirects=True) as client:
	resp = await client.get(url, headers={"User-Agent": scraper.headers["User-Agent"]})
	if resp.status_code == 200:
	# Save to cache
	content = resp.content
	with open(cache_path, "wb") as f:
	f.write(content)

	# Return the image stream directly
	return StreamingResponse(
	io.BytesIO(content),
	media_type=resp.headers.get("Content-Type", "image/jpeg"),
	headers={"Cache-Control": "public, max-age=31536000"}
	)
	else:
	logger.warning(f"Failed to proxy image {url} (Status: {resp.status_code})")
	return JSONResponse(status_code=resp.status_code, content={"error": f"Failed (Status {resp.status_code})"})
	except httpx.TimeoutException:
	logger.warning(f"Timeout proxying image: {url}")
	return JSONResponse(status_code=504, content={"error": "Image timeout"})
	except Exception as e:
	logger.error(f"Proxy image error for {url}: {type(e).__name__} - {str(e)}")
	return JSONResponse(status_code=500, content={"error": str(e)})

	@app.get("/download/info")
	async def get_download_info(url: str):
	try:
	info = await downloader.get_info(url)
	return info
	except Exception as e:
	logger.error(f"Download info error for {url}: {e}")
	raise HTTPException(status_code=500, detail=str(e))



	@app.get("/download/file")
	async def download_file(url: str, filename: str = "video.mp4"):
	"""Handles file downloads, proxying if necessary to bypass IP blocks or hotlink protection."""
	if not url:
	raise HTTPException(status_code=400, detail="URL is required")

	url = unquote(url)

	# Domains that REQUIRE proxying (IP-bound or strict hotlink protection)
	proxy_domains = [
	"googlevideo.com",
	"manifest.googlevideo.com",
	"larozavideo.net",
	"larooza.site",
	"larooza.mom",
	"laroza-tv.net",
	"youtube.com",
	"youtu.be"
	]

	should_proxy = any(domain in url for domain in proxy_domains)

	if should_proxy:
	logger.info(f"🛡️ Proxying download: {filename[:50]}...")

	# Clean filename for the ASCII part of Content-Disposition
	# Remove non-ASCII characters for the fallback filename
	ascii_filename = re.sub(r'[^\x00-\x7F]+', '_', filename)
	encoded_filename = quote(filename)

	async def stream_generator():
	async with httpx.AsyncClient(timeout=None, follow_redirects=True) as client:
	try:
	async with client.stream("GET", url, headers={"User-Agent": scraper.headers["User-Agent"]}) as resp:
	if resp.status_code != 200:
	logger.error(f"Proxy source returned {resp.status_code}")
	return

	# We can't easily set Content-Length here because StreamingResponse
	# starts before we have all chunks, but we can set it in the outer response
	async for chunk in resp.aiter_bytes(chunk_size=1024*1024):
	yield chunk
	except Exception as e:
	logger.error(f"Streaming error: {e}")

	# Get initial headers to find content length/type if possible
	try:
	async with httpx.AsyncClient(timeout=10.0, follow_redirects=True) as client:
	head_resp = await client.head(url, headers={"User-Agent": scraper.headers["User-Agent"]})
	content_length = head_resp.headers.get("Content-Length")
	content_type = head_resp.headers.get("Content-Type", "video/mp4")
	except:
	content_length = None
	content_type = "video/mp4"

	headers = {
	"Content-Disposition": f"attachment; filename=\"{ascii_filename}\"; filename*=UTF-8''{encoded_filename}",
	"Access-Control-Expose-Headers": "Content-Disposition"
	}
	if content_length:
	headers["Content-Length"] = content_length

	return StreamingResponse(stream_generator(), media_type=content_type, headers=headers)

	# For other sources, a simple redirect is much faster and saves server bandwidth
	return RedirectResponse(url=url)

	@app.get("/health")
	async def health():
	# Check FlareSolverr
	fs_status = "OFFLINE"
	try:
	# Increase timeout as solver might be busy
	async with httpx.AsyncClient(timeout=5.0) as client:
	resp = await client.get("http://localhost:8191/health")
	if resp.status_code == 200:
	fs_status = "ONLINE"
	except:
	pass

	return {
	"backend": "ONLINE",
	"flaresolverr": fs_status,
	"scraper_sync": scraper._cookies_synced,
	"timestamp": time.time()
	}

	# --- Frontend Mounting ---
	# This ensures that our React app is served directly by FastAPI in production
	# Check both relative and same-level structures for Docker/Local compatibility
	base_dir = os.path.dirname(__file__)
	frontend_path = os.path.join(base_dir, "meih-netflix-clone", "dist")

	if not os.path.exists(frontend_path):
	# Try one level up (local dev structure)
	frontend_path = os.path.join(base_dir, "..", "meih-netflix-clone", "dist")

	if os.path.exists(frontend_path):
	# Assets are usually in dist/assets and referenced as /assets/ in Vite
	assets_path = os.path.join(frontend_path, "assets")
	if os.path.exists(assets_path):
	app.mount("/assets", StaticFiles(directory=assets_path), name="assets")

	@app.get("/{full_path:path}")
	async def serve_frontend(full_path: str):
	# Prevent infinite recursion for API routes if someone hits a wrong URL
	if full_path.startswith(("api/", "latest", "category/", "search", "details", "proxy", "download", "health")):
	return JSONResponse(status_code=404, content={"error": "Not Found"})
	# If the path starts with api/ or other backend routes, it should have been caught above
	# Otherwise, serve the main index.html for React Router to handle
	file_path = os.path.join(frontend_path, full_path)
	if os.path.exists(file_path) and os.path.isfile(file_path):
	return FileResponse(file_path)
	return FileResponse(os.path.join(frontend_path, "index.html"))
	else:
	logger.warning(f"Frontend dist folder not found at {frontend_path}. Frontend serving disabled.")

	if __name__ == "__main__":
	import uvicorn
	# Use port 7860 for Hugging Face Spaces compatibility
	uvicorn.run(app, host="0.0.0.0", port=7860)