from flask import Flask, request, Response, jsonify import requests import threading import time import random import logging from typing import Optional app = Flask(__name__) # ========================= # CONFIG # ========================= Server = [ "https://APINOW-service-auto_run.hf.space" ] HF_SERVERS = [ "https://6lqmgwrrrn-api1.hf.space", "https://6lqmgwrrrn-api2.hf.space", "https://6lqmgwrrrn-api3.hf.space", "https://6lqmgwrrrn-api4.hf.space", "https://6lqmgwrrrn-api5.hf.space", "https://6lqmgwrrrn-api6.hf.space", "https://6lqmgwrrrn-api7.hf.space", "https://6lqmgwrrrn-api8.hf.space", "https://dokek64685-api1.hf.space", "https://dokek64685-api2.hf.space", "https://dokek64685-api3.hf.space", "https://dokek64685-api4.hf.space", "https://dokek64685-api5.hf.space", "https://dokek64685-api6.hf.space", "https://dokek64685-api7.hf.space", "https://dokek64685-api8.hf.space", "https://sayonob407-api1.hf.space", "https://sayonob407-api2.hf.space", "https://sayonob407-api3.hf.space", "https://sayonob407-api4.hf.space", "https://sayonob407-api5.hf.space", "https://sayonob407-api6.hf.space", "https://sayonob407-api7.hf.space", "https://sayonob407-api8.hf.space", "https://Shadowty491-none1.hf.space", "https://Shadowty491-none2.hf.space", "https://Shadowty491-none3.hf.space", "https://Shadowty491-none4.hf.space", "https://Shadowty491-none5.hf.space", "https://Shadowty491-none6.hf.space", "https://Shadowty491-none7.hf.space", "https://Shadowty491-none8.hf.space", ] PING_INTERVAL = 120 REQUEST_TIMEOUT = 300 # ========================= # LOGGING # ========================= logging.basicConfig( level=logging.INFO, format="%(asctime)s — %(levelname)s — %(message)s" ) logger = logging.getLogger("proxy") def pick_server() -> str: return random.choice(HF_SERVERS) def error_response(message: str, code: int = 500): logger.error(message) return jsonify({ "error": { "message": message, "type": "proxy_error" } }), code # ========================= # CHAT COMPLETIONS PROXY # ========================= @app.route("/v1/chat/completions", methods=["POST"]) def chat_completions(): try: payload = request.get_json(force=True) except Exception: return error_response("Invalid JSON payload", 400) headers = dict(request.headers) headers.pop("Host", None) target = pick_server() + "/v1/chat/completions" stream = payload.get("stream", False) logger.info(f"Forwarding request to {target} | Stream={stream}") try: if stream: def generate(): with requests.post( target, json=payload, headers=headers, stream=True, timeout=REQUEST_TIMEOUT ) as res: res.raise_for_status() for line in res.iter_lines(): if line: yield line + b"\n\n" return Response( generate(), mimetype="text/event-stream", headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"} ) res = requests.post( target, json=payload, headers=headers, timeout=REQUEST_TIMEOUT ) res.raise_for_status() return jsonify(res.json()) except requests.exceptions.Timeout: return error_response("Upstream server timeout", 504) except requests.exceptions.ConnectionError: return error_response("Failed to connect to upstream server", 502) except Exception as e: return error_response(str(e), 500) # ========================= # MODELS ROUTE # ========================= @app.route("/models", methods=["GET"]) def models(): url = pick_server() target = f"{url}/models" logger.info(f"Fetching models from {target}") try: res = requests.get(target, timeout=20) res.raise_for_status() return jsonify(res.json()) except Exception as e: return error_response(f"Failed to fetch models: {e}", 500) # ========================= # HEALTH CHECK # ========================= @app.route("/", methods=["GET"]) def home(): return jsonify({"status": "running", "service": "HF Proxy"}) # ========================= # KEEP ALIVE PINGER # ========================= HEADERS = {"User-Agent": "HF-Proxy-KeepAliveBot"} def keep_alive_worker(): while True: logger.info("Pinging HF servers...") for url in HF_SERVERS + Server: try: r = requests.get(url, headers=HEADERS, timeout=8) logger.info(f"{url} → {r.status_code}") except Exception as e: logger.warning(f"{url} → ERROR: {e}") time.sleep(PING_INTERVAL) def start_background_thread(): thread = threading.Thread(target=keep_alive_worker, daemon=True) thread.start() logger.info("Background keep-alive thread started") # ========================= # MAIN # ========================= if __name__ == "__main__": start_background_thread() app.run( host="0.0.0.0", port=7860, threaded=True, debug=False )