File size: 2,159 Bytes
708f4a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import threading
from typing import Optional, List, Any

class LockFreeVocabCache:
    """
    Lock-free cache using atomic operations logic for thread-safe access.
    
    Uses versioning to detect concurrent modifications (ABA problem prevention).
    Optimized for read-heavy workloads typical in tokenization.
    """

    def __init__(self, capacity: int = 8192):
        self.capacity = capacity
        # Ensure power of 2 for fast masking
        assert (capacity & (capacity - 1)) == 0, "Capacity must be power of 2"
        self.mask = capacity - 1
        
        # Pre-allocated arrays [cite: 607-609]
        self.keys: List[Optional[str]] = [None] * capacity
        self.values: List[Optional[int]] = [None] * capacity
        self.versions: List[int] = [0] * capacity
        
    def get(self, key: str) -> Optional[int]:
        """
        Thread-safe cache lookup using optimistic concurrency[cite: 615].
        """
        idx = hash(key) & self.mask
        
        # 1. Read version before data
        start_version = self.versions[idx]
        
        # 2. Optimistic read of key/value
        stored_key = self.keys[idx]
        stored_value = self.values[idx]
        
        # 3. Read version after data (Memory Barrier simulation)
        end_version = self.versions[idx]
        
        # Validation: Version matches and key matches
        if start_version == end_version and stored_key == key:
            return stored_value
            
        return None # Cache miss or concurrent modification

    def put(self, key: str, value: int) -> None:
        """
        Thread-safe insertion with optimistic collision handling[cite: 627].
        """
        idx = hash(key) & self.mask
        
        # Simple atomic update simulation
        # In pure Python, assignment is atomic for simple types, but we increment version
        # to invalidate readers.
        
        current_ver = self.versions[idx]
        self.versions[idx] = current_ver + 1 # Invalidate readers
        
        self.keys[idx] = key
        self.values[idx] = value
        
        self.versions[idx] = current_ver + 2 # Validate new data