Spaces:

Fred808
/

INV

Paused

App Files Files Community

INV / virtual_gpu_driver /src /warp.py

Fred808

Upload 256 files

7a0c684 verified 4 months ago

raw

history blame contribute delete

16.3 kB

	from typing import Dict, List, Tuple, Any, Optional
	import numpy as np
	import threading
	import time
	import json
	from dataclasses import dataclass
	from enum import Enum
	import duckdb
	from huggingface_hub import HfApi, HfFileSystem
	from .memory import RegisterFile
	from config import get_hf_token_cached

	# Initialize token from .env



	@dataclass
	class WarpBarrier:
	"""Represents a synchronization barrier for warps"""
	barrier_id: str
	num_warps: int
	arrived: int = 0
	completed: bool = False
	lock: threading.Lock = threading.Lock()
	condition: threading.Condition = threading.Condition()

	class ShuffleMode(Enum):
	"""Different modes for warp shuffle operations"""
	UP = "up" # Shuffle up relative to caller
	DOWN = "down" # Shuffle down relative to caller
	XOR = "xor" # Butterfly shuffle pattern
	IDX = "idx" # Direct index-based shuffle
	BCAST = "bcast" # Broadcast from source lane

	class VotingMode(Enum):
	"""Different modes for warp voting operations"""
	ALL = "all" # True if predicate is true for all active threads
	ANY = "any" # True if predicate is true for any active thread
	BALLOT = "ballot" # Returns bitmask of true predicates
	COUNT = "count" # Returns count of true predicates

	class WarpState(Enum):
	"""Possible states for a warp"""
	READY = "ready" # Ready to execute
	RUNNING = "running" # Currently executing
	BLOCKED = "blocked" # Waiting for synchronization
	YIELDED = "yielded" # Voluntarily yielded execution
	COMPLETED = "completed" # Finished execution

	class Warp:
	"""Represents a group of threads that execute together with advanced synchronization"""
	DB_URL = "hf://datasets/Fred808/helium/storage.json"

	def __init__(self, warp_id: int, num_threads: int = 32, db_url: Optional[str] = None):
	self.warp_id = warp_id
	self.num_threads = min(num_threads, 32) # Max 32 threads per warp
	self.active_mask = (1 << self.num_threads) - 1 # All threads active initially
	self.predicate_mask = (1 << self.num_threads) - 1 # For predicated execution
	self.registers = [RegisterFile() for _ in range(self.num_threads)]
	self.state = WarpState.READY

	# Initialize database connection
	self.db_url = db_url or self.DB_URL
	self.max_retries = 3
	self._connect_with_retries()
	self._setup_database()

	# Register warp in database
	self._register_warp()

	def _connect_with_retries(self):
	"""Establish database connection with retry logic"""
	for attempt in range(self.max_retries):
	try:
	self.conn = self._init_db_connection()
	return
	except Exception as e:
	if attempt == self.max_retries - 1:
	raise RuntimeError(f"Failed to initialize database after {self.max_retries} attempts: {str(e)}")
	time.sleep(1)

	def _init_db_connection(self) -> duckdb.DuckDBPyConnection:
	"""Initialize database connection with HuggingFace configuration"""
	# Convert HF URL to S3 path
	_, _, owner, dataset, db_file = self.db_url.split('/', 4)
	db_path = f"s3://datasets-cached/{owner}/{dataset}/{db_file}"

	# Connect to remote database
	conn = duckdb.connect(db_path)
	conn.execute("INSTALL httpfs;")
	conn.execute("LOAD httpfs;")
	conn.execute("SET s3_endpoint='s3.us-east-1.amazonaws.com';")
	conn.execute("SET s3_use_ssl=true;")
	conn.execute("SET s3_url_style='path';")
	conn.execute(f"SET s3_access_key_id='{self.HF_TOKEN}';")
	conn.execute(f"SET s3_secret_access_key='{self.HF_TOKEN}';")
	return conn

	def _setup_database(self):
	"""Initialize database tables"""
	# Warp state table
	self.conn.execute("""
	CREATE TABLE IF NOT EXISTS warps (
	warp_id VARCHAR PRIMARY KEY,
	num_threads INTEGER,
	active_mask BIGINT,
	predicate_mask BIGINT,
	state VARCHAR,
	created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
	updated_at TIMESTAMP,
	state_json JSON
	)
	""")

	# Barrier table
	self.conn.execute("""
	CREATE TABLE IF NOT EXISTS warp_barriers (
	barrier_id VARCHAR PRIMARY KEY,
	num_warps INTEGER,
	arrived_count INTEGER DEFAULT 0,
	completed BOOLEAN DEFAULT false,
	created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
	completed_at TIMESTAMP,
	state_json JSON
	)
	""")

	# Register table
	self.conn.execute("""
	CREATE TABLE IF NOT EXISTS warp_registers (
	warp_id VARCHAR,
	thread_id INTEGER,
	register_id INTEGER,
	value BLOB,
	updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
	PRIMARY KEY (warp_id, thread_id, register_id)
	)
	""")

	def _register_warp(self):
	"""Register warp in database"""
	self.conn.execute("""
	INSERT INTO warps (
	warp_id, num_threads, active_mask, predicate_mask,
	state, state_json
	) VALUES (?, ?, ?, ?, ?, ?)
	""", [
	str(self.warp_id),
	self.num_threads,
	self.active_mask,
	self.predicate_mask,
	self.state.value,
	{"status": "initialized"}
	])
	self.pc = 0 # Program counter

	# Synchronization
	self.barriers: Dict[str, WarpBarrier] = {}
	self.lock = threading.Lock()

	# Performance tracking
	self.cycles_executed = 0
	self.last_active_time = time.time()

	def get_active_threads(self) -> List[int]:
	"""Get indices of currently active threads"""
	return [i for i in range(self.num_threads)
	if self.active_mask & (1 << i)]

	def get_predicated_threads(self) -> List[int]:
	"""Get indices of threads that pass predication"""
	return [i for i in range(self.num_threads)
	if (self.active_mask & (1 << i)) and (self.predicate_mask & (1 << i))]

	def set_active_mask(self, mask: int):
	"""Set which threads are active"""
	with self.lock:
	self.active_mask = mask & ((1 << self.num_threads) - 1)

	def set_predicate_mask(self, mask: int):
	"""Set predication mask for conditional execution"""
	with self.lock:
	self.predicate_mask = mask & ((1 << self.num_threads) - 1)

	def sync(self, barrier_id: str = None):
	"""Synchronize all threads in the warp at a barrier"""
	if not barrier_id:
	barrier_id = f"warp_{self.warp_id}_barrier_{time.time_ns()}"

	with self.lock:
	if barrier_id not in self.barriers:
	self.barriers[barrier_id] = WarpBarrier(
	barrier_id=barrier_id,
	num_warps=1
	)

	barrier = self.barriers[barrier_id]
	with barrier.lock:
	barrier.arrived += 1
	if barrier.arrived == barrier.num_warps:
	barrier.completed = True
	with barrier.condition:
	barrier.condition.notify_all()
	else:
	while not barrier.completed:
	with barrier.condition:
	barrier.condition.wait()

	def vote(self, predicate: List[bool], mode: VotingMode = VotingMode.ALL) -> Any:
	"""Perform voting operation across threads"""
	active_threads = self.get_predicated_threads()
	if not active_threads:
	return False if mode != VotingMode.BALLOT else 0

	if mode == VotingMode.ALL:
	return all(predicate[i] for i in active_threads)
	elif mode == VotingMode.ANY:
	return any(predicate[i] for i in active_threads)
	elif mode == VotingMode.BALLOT:
	return sum(1 << i for i in active_threads if predicate[i])
	elif mode == VotingMode.COUNT:
	return sum(1 for i in active_threads if predicate[i])

	def shuffle(self, var: List[Any], mode: ShuffleMode, offset: int) -> List[Any]:
	"""Exchange variables between threads using different shuffle patterns"""
	active_threads = self.get_predicated_threads()
	result = list(var) # Create copy to store results

	if mode == ShuffleMode.UP:
	# Shift values up by offset
	for i in active_threads:
	src_idx = (i - offset) % self.num_threads
	if src_idx in active_threads:
	result[i] = var[src_idx]

	elif mode == ShuffleMode.DOWN:
	# Shift values down by offset
	for i in active_threads:
	src_idx = (i + offset) % self.num_threads
	if src_idx in active_threads:
	result[i] = var[src_idx]

	elif mode == ShuffleMode.XOR:
	# Butterfly pattern exchange
	for i in active_threads:
	src_idx = i ^ offset # XOR with offset
	if src_idx < self.num_threads and src_idx in active_threads:
	result[i] = var[src_idx]

	elif mode == ShuffleMode.IDX:
	# Direct index-based shuffle
	for i in active_threads:
	if offset < self.num_threads and offset in active_threads:
	result[i] = var[offset]

	elif mode == ShuffleMode.BCAST:
	# Broadcast from source lane
	if offset < self.num_threads and offset in active_threads:
	src_val = var[offset]
	for i in active_threads:
	result[i] = src_val

	return result

	def execute(self, func: callable, args, *kwargs):
	"""Execute a function across all active threads"""
	active_threads = self.get_active_threads()
	results = []

	for thread_idx in active_threads:
	# Set up thread context
	ctx = {
	'thread_idx': thread_idx,
	'warp_id': self.warp_id,
	'registers': self.registers[thread_idx]
	}

	# Execute thread
	result = func(ctx, args, *kwargs)
	results.append(result)

	return results

	class WarpScheduler:
	"""Advanced warp scheduler with priority and dependency handling"""

	def __init__(self, max_warps: int = 32, max_active_warps: int = 16):
	self.max_warps = max_warps
	self.max_active_warps = max_active_warps
	self.warps: List[Warp] = []
	self.active_warps: Dict[int, bool] = {}
	self.warp_priorities: Dict[int, int] = {}
	self.warp_dependencies: Dict[int, List[int]] = {}
	self.lock = threading.Lock()

	def create_warp(self, num_threads: int = 32, priority: int = 0) -> Warp:
	"""Create a new warp with specified priority"""
	with self.lock:
	if len(self.warps) >= self.max_warps:
	raise RuntimeError("Maximum number of warps reached")

	warp_id = len(self.warps)
	warp = Warp(warp_id, num_threads)
	self.warps.append(warp)
	self.active_warps[warp_id] = True
	self.warp_priorities[warp_id] = priority
	self.warp_dependencies[warp_id] = []
	return warp

	def set_warp_priority(self, warp_id: int, priority: int):
	"""Set execution priority for a warp"""
	with self.lock:
	if 0 <= warp_id < len(self.warps):
	self.warp_priorities[warp_id] = priority

	def add_warp_dependency(self, warp_id: int, depends_on: int):
	"""Add execution dependency between warps"""
	with self.lock:
	if 0 <= warp_id < len(self.warps) and 0 <= depends_on < len(self.warps):
	self.warp_dependencies[warp_id].append(depends_on)

	def remove_warp_dependency(self, warp_id: int, depends_on: int):
	"""Remove execution dependency between warps"""
	with self.lock:
	if 0 <= warp_id < len(self.warps):
	try:
	self.warp_dependencies[warp_id].remove(depends_on)
	except ValueError:
	pass

	def suspend_warp(self, warp_id: int):
	"""Suspend a warp from execution"""
	with self.lock:
	if 0 <= warp_id < len(self.warps):
	self.active_warps[warp_id] = False
	self.warps[warp_id].state = WarpState.BLOCKED

	def resume_warp(self, warp_id: int):
	"""Resume a suspended warp"""
	with self.lock:
	if 0 <= warp_id < len(self.warps):
	self.active_warps[warp_id] = True
	self.warps[warp_id].state = WarpState.READY

	def synchronize_warps(self, warp_ids: List[int], barrier_id: str = None):
	"""Synchronize a group of warps"""
	if not barrier_id:
	barrier_id = f"barrier_{time.time_ns()}"

	# Create barrier
	barrier = WarpBarrier(barrier_id=barrier_id, num_warps=len(warp_ids))

	# Register barrier with each warp
	for warp_id in warp_ids:
	if 0 <= warp_id < len(self.warps):
	warp = self.warps[warp_id]
	warp.barriers[barrier_id] = barrier

	# Wait for all warps to reach barrier
	for warp_id in warp_ids:
	if 0 <= warp_id < len(self.warps):
	self.warps[warp_id].sync(barrier_id)

	def schedule(self) -> List[Warp]:
	"""Schedule warps for execution based on priority and dependencies"""
	with self.lock:
	ready_warps = []

	# Get warps that are ready to execute
	for warp_id, warp in enumerate(self.warps):
	if not self.active_warps.get(warp_id, False):
	continue

	# Check dependencies
	dependencies_met = all(
	self.warps[dep_id].state == WarpState.COMPLETED
	for dep_id in self.warp_dependencies.get(warp_id, [])
	)

	if dependencies_met and warp.state == WarpState.READY:
	ready_warps.append((warp_id, self.warp_priorities.get(warp_id, 0)))

	# Sort by priority (higher numbers = higher priority)
	ready_warps.sort(key=lambda x: x[1], reverse=True)

	# Return warps up to max_active_warps
	return [self.warps[warp_id]
	for warp_id, _ in ready_warps[:self.max_active_warps]]

	def execute_warps(self, func: callable, args, *kwargs):
	"""Execute function across all active warps with scheduling"""
	results = []
	scheduled_warps = self.schedule()

	for warp in scheduled_warps:
	warp.state = WarpState.RUNNING
	result = warp.execute(func, args, *kwargs)
	results.extend(result)
	warp.last_active_time = time.time()
	warp.cycles_executed += 1
	warp.state = WarpState.READY

	return results