MightyOctopus commited on
Commit
13ffba1
·
1 Parent(s): a6b086f

GPT5_mini rag evaluation code

Browse files
Files changed (2) hide show
  1. day2.3.ipynb +0 -0
  2. testing.py +77 -0
day2.3.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
testing.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import matplotlib.pyplot as plt
3
+
4
+ GREEN = "\033[92m"
5
+ YELLOW = "\033[93m"
6
+ RED = "\033[91m"
7
+ RESET = "\033[0m"
8
+ COLOR_MAP = {"red": RED, "orange": YELLOW, "green": GREEN}
9
+
10
+
11
+ class Tester:
12
+
13
+ def __init__(self, predictor, data, title=None, size=250):
14
+ self.predictor = predictor
15
+ self.data = data
16
+ self.title = title or predictor.__name__.replace("_", " ").title()
17
+ self.size = size
18
+ self.guesses = []
19
+ self.truths = []
20
+ self.errors = []
21
+ self.sles = []
22
+ self.colors = []
23
+
24
+ def color_for(self, error, truth):
25
+ if error < 40 or error / truth < 0.2:
26
+ return "green"
27
+ elif error < 80 or error / truth < 0.4:
28
+ return "orange"
29
+ else:
30
+ return "red"
31
+
32
+ def run_datapoint(self, i):
33
+ datapoint = self.data[i]
34
+ guess = self.predictor(datapoint)
35
+ truth = datapoint.price
36
+ error = abs(guess - truth)
37
+ log_error = math.log(truth + 1) - math.log(guess + 1)
38
+ sle = log_error ** 2
39
+ color = self.color_for(error, truth)
40
+ title = datapoint.title if len(datapoint.title) <= 40 else datapoint.title[:40] + "..."
41
+ self.guesses.append(guess)
42
+ self.truths.append(truth)
43
+ self.errors.append(error)
44
+ self.sles.append(sle)
45
+ self.colors.append(color)
46
+ print(
47
+ f"{COLOR_MAP[color]}{i + 1}: Guess: ${guess:,.2f} Truth: ${truth:,.2f} Error: ${error:,.2f} SLE: {sle:,.2f} Item: {title}{RESET}")
48
+
49
+ def chart(self, title):
50
+ max_error = max(self.errors)
51
+ plt.figure(figsize=(12, 8))
52
+ max_val = max(max(self.truths), max(self.guesses))
53
+ plt.plot([0, max_val], [0, max_val], color='deepskyblue', lw=2, alpha=0.6)
54
+ plt.scatter(self.truths, self.guesses, s=3, c=self.colors)
55
+ plt.xlabel('Ground Truth')
56
+ plt.ylabel('Model Estimate')
57
+ plt.xlim(0, max_val)
58
+ plt.ylim(0, max_val)
59
+ plt.title(title)
60
+ plt.show()
61
+
62
+ def report(self):
63
+ average_error = sum(self.errors) / self.size
64
+ rmsle = math.sqrt(sum(self.sles) / self.size)
65
+ hits = sum(1 for color in self.colors if color == "green")
66
+ title = f"{self.title} Error=${average_error:,.2f} RMSLE={rmsle:,.2f} Hits={hits / self.size * 100:.1f}%"
67
+ self.chart(title)
68
+
69
+ def run(self):
70
+ self.error = 0
71
+ for i in range(self.size):
72
+ self.run_datapoint(i)
73
+ self.report()
74
+
75
+ @classmethod
76
+ def test(cls, function, data):
77
+ cls(function, data).run()