khushalcodiste commited on
Commit
508f0ad
·
1 Parent(s): 5fccbac

feat: fixed response

Browse files
Files changed (1) hide show
  1. ml/markov.py +55 -0
ml/markov.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Markov-chain baseline over winning numbers (orders 1, 2, 3).
2
+
3
+ Each order-K model counts transitions from every K-gram of past numbers to the
4
+ next number, then predicts the most frequent successor (with global frequency
5
+ as fallback).
6
+ """
7
+ from __future__ import annotations
8
+
9
+ from collections import Counter, defaultdict
10
+ from dataclasses import dataclass, field
11
+ from typing import Final
12
+
13
+ import numpy as np
14
+
15
+ NUMBER_CLASSES: Final[int] = 37
16
+
17
+
18
+ @dataclass
19
+ class MarkovModel:
20
+ order: int
21
+ transitions: dict[tuple[int, ...], np.ndarray] = field(default_factory=dict)
22
+ global_counts: np.ndarray = field(default_factory=lambda: np.zeros(NUMBER_CLASSES, dtype=np.int64))
23
+
24
+ def fit(self, sequences: list[np.ndarray]) -> "MarkovModel":
25
+ counter: dict[tuple[int, ...], Counter[int]] = defaultdict(Counter)
26
+ global_counter: Counter[int] = Counter()
27
+ for seq in sequences:
28
+ for n in seq:
29
+ global_counter[int(n)] += 1
30
+ for i in range(len(seq) - self.order):
31
+ key = tuple(int(x) for x in seq[i : i + self.order])
32
+ target = int(seq[i + self.order])
33
+ counter[key][target] += 1
34
+ for key, c in counter.items():
35
+ arr = np.zeros(NUMBER_CLASSES, dtype=np.int64)
36
+ for n, count in c.items():
37
+ arr[n] = count
38
+ self.transitions[key] = arr
39
+ for n, count in global_counter.items():
40
+ self.global_counts[n] = count
41
+ return self
42
+
43
+ def predict_proba(self, context: tuple[int, ...]) -> np.ndarray:
44
+ key = context[-self.order :] if self.order > 0 else ()
45
+ arr = self.transitions.get(key)
46
+ if arr is None or arr.sum() == 0:
47
+ arr = self.global_counts
48
+ total = arr.sum()
49
+ if total == 0:
50
+ return np.full(NUMBER_CLASSES, 1.0 / NUMBER_CLASSES, dtype=np.float64)
51
+ return arr.astype(np.float64) / total
52
+
53
+ def predict(self, context: tuple[int, ...]) -> int:
54
+ proba = self.predict_proba(context)
55
+ return int(np.argmax(proba))