Spaces:

Alvin3y1
/

plasma1

Sleeping

App Files Files Community

plasma1 / app.py

Alvin3y1

Update app.py

dc0f259 verified 3 days ago

raw

history blame contribute delete

17.2 kB

	import asyncio
	import json
	import os
	import shutil
	import subprocess
	import time
	import logging
	import concurrent.futures
	import threading
	import numpy as np
	import psutil
	import ctypes
	from ctypes import c_int, c_void_p, c_char_p, POINTER, c_ubyte, c_bool
	from aiohttp import web
	from aiortc import RTCPeerConnection, RTCSessionDescription, VideoStreamTrack, RTCIceServer, RTCConfiguration
	from av import VideoFrame

	# ==========================================
	# C++ X11 CAPTURE + FAULT TOLERANCE + DUAL CHANNEL
	# ==========================================
	CPP_SOURCE = r"""
	#include <X11/Xlib.h>
	#include <X11/Xutil.h>
	#include <X11/extensions/XShm.h>
	#include <X11/extensions/XTest.h>
	#include <X11/keysym.h>
	#include <sys/ipc.h>
	#include <sys/shm.h>
	#include <stdio.h>
	#include <stdlib.h>
	#include <string.h>

	extern "C" {
	#include <libswscale/swscale.h>
	#include <libavutil/imgutils.h>
	}

	// Custom Error Handler
	static int last_x_error_code = 0;
	int XErrorHandlerImpl(Display display, XErrorEvent event) {
	last_x_error_code = event->error_code;
	return 0;
	}

	struct ScreenCap {
	Display* display; // For Video Capture
	Display* input_display; // For Input Injection (Separate Channel)
	Window root;
	XImage* image;
	XShmSegmentInfo shminfo;
	int width;
	int height;
	int is_init;
	struct SwsContext* sws_ctx;
	};

	static ScreenCap cap = {0};

	extern "C" {

	void cleanup() {
	if (cap.sws_ctx) {
	sws_freeContext(cap.sws_ctx);
	cap.sws_ctx = NULL;
	}
	if (cap.image) {
	XShmDetach(cap.display, &cap.shminfo);
	XDestroyImage(cap.image);
	shmdt(cap.shminfo.shmaddr);
	shmctl(cap.shminfo.shmid, IPC_RMID, 0);
	cap.image = NULL;
	}
	if (cap.input_display) {
	XCloseDisplay(cap.input_display);
	cap.input_display = NULL;
	}
	if (cap.display) {
	XCloseDisplay(cap.display);
	cap.display = NULL;
	}
	cap.is_init = 0;
	}

	int init_grabber(int w, int h, const char* display_name) {
	if (cap.is_init && cap.width == w && cap.height == h && cap.display) return 1;

	if (cap.is_init) cleanup();

	XSetErrorHandler(XErrorHandlerImpl);
	if (!XInitThreads()) return 0;

	// 1. Video Connection
	cap.display = XOpenDisplay(display_name);
	if (!cap.display) return 0;

	// 2. Input Connection (Separate socket for lower latency)
	cap.input_display = XOpenDisplay(display_name);
	if (!cap.input_display) {
	XCloseDisplay(cap.display);
	return 0;
	}

	cap.root = DefaultRootWindow(cap.display);
	cap.width = w;
	cap.height = h;

	XWindowAttributes window_attributes;
	XGetWindowAttributes(cap.display, cap.root, &window_attributes);
	Screen* screen = window_attributes.screen;

	cap.shminfo.shmid = shmget(IPC_PRIVATE, w * h * 4, IPC_CREAT \| 0777);
	cap.shminfo.shmaddr = (char*)shmat(cap.shminfo.shmid, 0, 0);
	cap.shminfo.readOnly = False;

	cap.image = XShmCreateImage(cap.display, DefaultVisualOfScreen(screen),
	window_attributes.depth, ZPixmap, NULL,
	&cap.shminfo, w, h);

	if (!cap.image) {
	cleanup();
	return 0;
	}

	cap.image->data = cap.shminfo.shmaddr;
	XShmAttach(cap.display, &cap.shminfo);
	XSync(cap.display, False);

	// SWS_FAST_BILINEAR is good, SWS_POINT is faster but blocky.
	// Using BILINEAR for balance.
	cap.sws_ctx = sws_getContext(w, h, AV_PIX_FMT_BGRA,
	w, h, AV_PIX_FMT_YUV420P,
	SWS_FAST_BILINEAR, NULL, NULL, NULL);

	if (!cap.sws_ctx) {
	cleanup();
	return 0;
	}

	cap.is_init = 1;
	return 1;
	}

	int capture_frame() {
	if (cap.is_init && cap.display && cap.image) {
	last_x_error_code = 0;
	// This blocks only the video thread
	XShmGetImage(cap.display, cap.root, cap.image, 0, 0, AllPlanes);
	return (last_x_error_code == 0);
	}
	return 0;
	}

	// Optimized pointer math happens inside sws_scale
	void convert_to_yuv(void* y, int y_stride, void* u, int u_stride, void* v, int v_stride) {
	if (!cap.is_init \|\| !cap.sws_ctx \|\| !cap.image) return;

	const uint8_t* srcSlice[] = { (uint8_t*)cap.image->data };
	const int srcStride[] = { cap.width * 4 };
	uint8_t* dst[] = { (uint8_t)y, (uint8_t)u, (uint8_t*)v };
	const int dstStride[] = { y_stride, u_stride, v_stride };

	sws_scale(cap.sws_ctx, srcSlice, srcStride, 0, cap.height, dst, dstStride);
	}

	// INPUT FUNCTIONS USE SEPARATE DISPLAY CONNECTION
	void move_mouse(int x, int y) {
	if (!cap.is_init \|\| !cap.input_display) return;
	XTestFakeMotionEvent(cap.input_display, -1, x, y, CurrentTime);
	XFlush(cap.input_display); // Flush only input stream
	}

	void mouse_button(int button, int is_down) {
	if (!cap.is_init \|\| !cap.input_display) return;
	XTestFakeButtonEvent(cap.input_display, button, is_down ? True : False, CurrentTime);
	XFlush(cap.input_display);
	}

	void key_send(const char* key_name, int is_down) {
	if (!cap.is_init \|\| !cap.input_display) return;
	KeySym ks = XStringToKeysym(key_name);
	if (ks != NoSymbol) {
	KeyCode kc = XKeysymToKeycode(cap.input_display, ks);
	if (kc != 0) {
	XTestFakeKeyEvent(cap.input_display, kc, is_down ? True : False, CurrentTime);
	XFlush(cap.input_display);
	}
	}
	}

	}
	"""

	LIB_PATH = "./libxcapture_full.so"

	def compile_cpp():
	if os.path.exists(LIB_PATH):
	try: os.remove(LIB_PATH)
	except: pass

	with open("xcapture.cpp", "w") as f:
	f.write(CPP_SOURCE)

	# ADDED: -march=native -ffast-math -flto for CPU optimization
	cmd = [
	"g++", "-O3", "-march=native", "-ffast-math", "-flto", "-shared", "-fPIC",
	"-o", LIB_PATH, "xcapture.cpp",
	"-lX11", "-lXext", "-lswscale", "-lavutil", "-lXtst"
	]
	try:
	subprocess.check_call(cmd)
	print("C++ Optimized Library Compiled.")
	except Exception as e:
	print(f"Compilation failed: {e}")

	compile_cpp()

	# Load C++ Library
	try:
	xlib = ctypes.CDLL(LIB_PATH)

	xlib.init_grabber.argtypes = [c_int, c_int, c_char_p]
	xlib.init_grabber.restype = c_int
	xlib.capture_frame.argtypes = []
	xlib.capture_frame.restype = c_int
	xlib.cleanup.argtypes = []
	xlib.cleanup.restype = None
	xlib.convert_to_yuv.argtypes = [c_void_p, c_int, c_void_p, c_int, c_void_p, c_int]
	xlib.convert_to_yuv.restype = None
	xlib.move_mouse.argtypes = [c_int, c_int]
	xlib.move_mouse.restype = None
	xlib.mouse_button.argtypes = [c_int, c_int]
	xlib.mouse_button.restype = None
	xlib.key_send.argtypes = [c_char_p, c_int]
	xlib.key_send.restype = None

	USE_CSHM = True
	except Exception as e:
	print(f"Library load failed: {e}")
	USE_CSHM = False

	HOST = "0.0.0.0"
	PORT = 7860
	DISPLAY_NUM = ":99"
	MAX_WIDTH = 4096
	MAX_HEIGHT = 4096
	DEFAULT_WIDTH = 1280
	DEFAULT_HEIGHT = 720

	logging.basicConfig(level=logging.WARNING)

	# Dedicated thread for video capture
	video_executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)

	# LOCKS
	# video_lock: Protects XShm, resizing, and video display connection
	video_lock = threading.Lock()
	# Input does NOT use a lock anymore because it uses a separate X11 connection in C++

	config = {
	"width": DEFAULT_WIDTH,
	"height": DEFAULT_HEIGHT
	}

	class InputManager:
	def __init__(self):
	self.scroll_accum = 0

	# No locking needed here, C++ handles separate connection
	def mouse_move(self, x, y):
	if USE_CSHM: xlib.move_mouse(x, y)

	def mouse_down(self, btn):
	if USE_CSHM: xlib.mouse_button(btn, 1)

	def mouse_up(self, btn):
	if USE_CSHM: xlib.mouse_button(btn, 0)

	def scroll(self, dy):
	self.scroll_accum += dy
	if abs(self.scroll_accum) >= 40:
	btn = 4 if self.scroll_accum < 0 else 5
	if USE_CSHM:
	xlib.mouse_button(btn, 1)
	xlib.mouse_button(btn, 0)
	self.scroll_accum = 0

	def key_down(self, key):
	if USE_CSHM and key: xlib.key_send(key.encode('utf-8'), 1)

	def key_up(self, key):
	if USE_CSHM and key: xlib.key_send(key.encode('utf-8'), 0)

	input_manager = InputManager()

	def start_system():
	os.environ["DISPLAY"] = DISPLAY_NUM
	try: os.remove(f"/tmp/.X{DISPLAY_NUM.replace(':', '')}-lock")
	except: pass

	if not shutil.which("Xvfb"): raise FileNotFoundError("Xvfb missing")

	subprocess.Popen([
	"Xvfb", DISPLAY_NUM,
	"-screen", "0", f"{MAX_WIDTH}x{MAX_HEIGHT}x24",
	"-ac", "-noreset", "-nolisten", "tcp"
	], stderr=subprocess.DEVNULL)

	time.sleep(1) # Reduced sleep
	set_resolution(DEFAULT_WIDTH, DEFAULT_HEIGHT)

	if shutil.which("matchbox-window-manager"):
	subprocess.Popen("matchbox-window-manager -use_titlebar no", shell=True, stderr=subprocess.DEVNULL)

	threading.Thread(target=maintain_antigravity, daemon=True).start()

	def maintain_antigravity():
	while True:
	try:
	# Check optimization: Avoid list parsing overhead
	running = False
	for p in psutil.process_iter(['name']):
	if p.info['name'] == "antigravity":
	running = True
	break
	if not running:
	subprocess.Popen(["antigravity"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
	except: pass
	time.sleep(5) # Increased sleep to save CPU

	def set_resolution(w, h):
	with video_lock:
	if w == config["width"] and h == config["height"]:
	return

	try:
	if w % 2 != 0: w += 1
	if h % 2 != 0: h += 1
	if w > MAX_WIDTH: w = MAX_WIDTH
	if h > MAX_HEIGHT: h = MAX_HEIGHT

	mode_name = f"M_{w}_{h}"

	# Combine xrandr calls to reduce process fork overhead?
	# Xrandr is CLI, safer to keep separate but fast.
	subprocess.call(["xrandr", "--newmode", mode_name, f"{60wh/1000000:.2f}",
	str(w), str(w+40), str(w+80), str(w+160),
	str(h), str(h+3), str(h+10), str(h+16),
	"-hsync", "+vsync"], stderr=subprocess.DEVNULL)

	subprocess.call(["xrandr", "--addmode", "screen", mode_name], stderr=subprocess.DEVNULL)
	subprocess.call(["xrandr", "--output", "screen", "--mode", mode_name], stderr=subprocess.DEVNULL)

	config["width"] = w
	config["height"] = h
	print(f"Resized to {w}x{h}")
	except Exception as e:
	print(f"Resize failed: {e}")

	class VirtualScreenTrack(VideoStreamTrack):
	kind = "video"
	def __init__(self):
	super().__init__()
	self.last_w = 0
	self.last_h = 0
	self.fallback_frame = None

	def _produce_frame(self, w, h):
	if not USE_CSHM: return None

	# Only lock the video part. Input continues in parallel.
	with video_lock:
	try:
	if w != self.last_w or h != self.last_h:
	res = xlib.init_grabber(w, h, DISPLAY_NUM.encode('utf-8'))
	if res == 0: return None
	self.last_w = w
	self.last_h = h

	# 1. Capture (Copy X11 -> Shared Memory)
	if xlib.capture_frame() == 0:
	return None

	# 2. Allocate Frame (Python overhead, but required for aiortc)
	frame = VideoFrame(width=w, height=h, format="yuv420p")

	# 3. Convert (Shared Memory -> Frame Buffer)
	# Using direct address integer for speed
	xlib.convert_to_yuv(
	c_void_p(int(frame.planes[0].buffer_ptr)), frame.planes[0].line_size,
	c_void_p(int(frame.planes[1].buffer_ptr)), frame.planes[1].line_size,
	c_void_p(int(frame.planes[2].buffer_ptr)), frame.planes[2].line_size
	)
	return frame
	except:
	return None

	async def recv(self):
	pts, time_base = await self.next_timestamp()

	w, h = config["width"], config["height"]

	frame = None
	if USE_CSHM:
	# Offload heavy C++ work to thread.
	# While this runs, the Main Thread can process "process_input" (Mouse/Keys)
	# because we are not holding the Global Interpreter Lock (ctypes releases it)
	# and we are not holding a global "input lock".
	try:
	frame = await asyncio.get_event_loop().run_in_executor(
	video_executor, self._produce_frame, w, h
	)
	except: pass

	if frame is None:
	if self.fallback_frame is None or self.fallback_frame.width != w or self.fallback_frame.height != h:
	self.fallback_frame = VideoFrame.from_ndarray(np.zeros((h, w, 3), dtype=np.uint8), format="bgr24")
	frame = self.fallback_frame

	frame.pts = pts
	frame.time_base = time_base
	return frame

	async def offer(request):
	try:
	params = await request.json()
	offer = RTCSessionDescription(sdp=params["sdp"], type=params["type"])
	except: return web.Response(status=400)

	ice_servers = [RTCIceServer(urls=["stun:stun.l.google.com:19302"])]
	pc = RTCPeerConnection(RTCConfiguration(iceServers=ice_servers))
	pcs.add(pc)

	@pc.on("connectionstatechange")
	async def on_state():
	if pc.connectionState in ["failed", "closed"]:
	await pc.close()
	pcs.discard(pc)

	@pc.on("datachannel")
	def on_dc(channel):
	@channel.on("message")
	async def on_message(message):
	# Direct call to input processing
	await process_input(message)

	pc.addTrack(VirtualScreenTrack())

	await pc.setRemoteDescription(offer)
	answer = await pc.createAnswer()

	# FORCE HIGHER BITRATE (4Mbps)
	# Injecting bandwidth info into SDP before setting local description
	sdp_lines = answer.sdp.splitlines()
	new_sdp = []
	for line in sdp_lines:
	new_sdp.append(line)
	if line.startswith("m=video"):
	new_sdp.append("b=AS:4000")
	new_sdp.append("b=TIAS:4000000")
	answer.sdp = "\r\n".join(new_sdp)

	await pc.setLocalDescription(answer)

	return web.Response(content_type="application/json", text=json.dumps({
	"sdp": pc.localDescription.sdp,
	"type": pc.localDescription.type
	}), headers={"Access-Control-Allow-Origin": "*"})

	def map_key(key):
	if not key: return None
	mapping = {"Enter": "Return", "ArrowUp": "Up", "ArrowDown": "Down", "ArrowLeft": "Left", "ArrowRight": "Right"}
	return mapping.get(key, key)

	resize_timer = None

	async def handle_debounced_resize(w, h):
	global resize_timer
	if resize_timer: resize_timer.cancel()

	async def task():
	# REDUCED DELAY FROM 0.5 to 0.1 FOR FASTER RESIZING
	await asyncio.sleep(0.1)
	await asyncio.get_event_loop().run_in_executor(video_executor, set_resolution, w, h)

	resize_timer = asyncio.create_task(task())

	async def process_input(data):
	try:
	# Optimized parsing
	msg = json.loads(data)
	t = msg.get("type")

	# Immediate dispatch
	if t == "mousemove":
	w, h = config["width"], config["height"]
	input_manager.mouse_move(int(msg["x"] * w), int(msg["y"] * h))
	elif t == "mousedown":
	input_manager.mouse_down({0:1, 1:2, 2:3}.get(msg.get("button"), 1))
	elif t == "mouseup":
	input_manager.mouse_up({0:1, 1:2, 2:3}.get(msg.get("button"), 1))
	elif t == "keydown":
	k = map_key(msg.get("key"))
	if k: input_manager.key_down(k)
	elif t == "keyup":
	k = map_key(msg.get("key"))
	if k: input_manager.key_up(k)
	elif t == "wheel":
	input_manager.scroll(msg.get("deltaY", 0))
	elif t == "resize":
	w, h = int(msg.get("width")), int(msg.get("height"))
	if w > 100 and h > 100:
	await handle_debounced_resize(w, h)
	except: pass

	async def index(r): return web.Response(text="Optimized")
	async def options(r): return web.Response(headers={"Access-Control-Allow-Origin": "*", "Access-Control-Allow-Methods": "POST, OPTIONS", "Access-Control-Allow-Headers": "Content-Type"})

	pcs = set()
	async def on_shutdown(app):
	with video_lock:
	if USE_CSHM:
	try: xlib.cleanup()
	except: pass
	if pcs:
	await asyncio.gather(*[pc.close() for pc in pcs])

	if __name__ == "__main__":
	start_system()
	app = web.Application()
	app.on_shutdown.append(on_shutdown)
	app.router.add_get("/", index)
	app.router.add_post("/offer", offer)
	app.router.add_options("/offer", options)
	web.run_app(app, host=HOST, port=PORT)