File size: 2,159 Bytes
708f4a3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | import threading
from typing import Optional, List, Any
class LockFreeVocabCache:
"""
Lock-free cache using atomic operations logic for thread-safe access.
Uses versioning to detect concurrent modifications (ABA problem prevention).
Optimized for read-heavy workloads typical in tokenization.
"""
def __init__(self, capacity: int = 8192):
self.capacity = capacity
# Ensure power of 2 for fast masking
assert (capacity & (capacity - 1)) == 0, "Capacity must be power of 2"
self.mask = capacity - 1
# Pre-allocated arrays [cite: 607-609]
self.keys: List[Optional[str]] = [None] * capacity
self.values: List[Optional[int]] = [None] * capacity
self.versions: List[int] = [0] * capacity
def get(self, key: str) -> Optional[int]:
"""
Thread-safe cache lookup using optimistic concurrency[cite: 615].
"""
idx = hash(key) & self.mask
# 1. Read version before data
start_version = self.versions[idx]
# 2. Optimistic read of key/value
stored_key = self.keys[idx]
stored_value = self.values[idx]
# 3. Read version after data (Memory Barrier simulation)
end_version = self.versions[idx]
# Validation: Version matches and key matches
if start_version == end_version and stored_key == key:
return stored_value
return None # Cache miss or concurrent modification
def put(self, key: str, value: int) -> None:
"""
Thread-safe insertion with optimistic collision handling[cite: 627].
"""
idx = hash(key) & self.mask
# Simple atomic update simulation
# In pure Python, assignment is atomic for simple types, but we increment version
# to invalidate readers.
current_ver = self.versions[idx]
self.versions[idx] = current_ver + 1 # Invalidate readers
self.keys[idx] = key
self.values[idx] = value
self.versions[idx] = current_ver + 2 # Validate new data |