Spaces:
Sleeping
Sleeping
Commit ·
13ffba1
1
Parent(s): a6b086f
GPT5_mini rag evaluation code
Browse files- day2.3.ipynb +0 -0
- testing.py +77 -0
day2.3.ipynb
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
testing.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import math
|
| 2 |
+
import matplotlib.pyplot as plt
|
| 3 |
+
|
| 4 |
+
GREEN = "\033[92m"
|
| 5 |
+
YELLOW = "\033[93m"
|
| 6 |
+
RED = "\033[91m"
|
| 7 |
+
RESET = "\033[0m"
|
| 8 |
+
COLOR_MAP = {"red": RED, "orange": YELLOW, "green": GREEN}
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class Tester:
|
| 12 |
+
|
| 13 |
+
def __init__(self, predictor, data, title=None, size=250):
|
| 14 |
+
self.predictor = predictor
|
| 15 |
+
self.data = data
|
| 16 |
+
self.title = title or predictor.__name__.replace("_", " ").title()
|
| 17 |
+
self.size = size
|
| 18 |
+
self.guesses = []
|
| 19 |
+
self.truths = []
|
| 20 |
+
self.errors = []
|
| 21 |
+
self.sles = []
|
| 22 |
+
self.colors = []
|
| 23 |
+
|
| 24 |
+
def color_for(self, error, truth):
|
| 25 |
+
if error < 40 or error / truth < 0.2:
|
| 26 |
+
return "green"
|
| 27 |
+
elif error < 80 or error / truth < 0.4:
|
| 28 |
+
return "orange"
|
| 29 |
+
else:
|
| 30 |
+
return "red"
|
| 31 |
+
|
| 32 |
+
def run_datapoint(self, i):
|
| 33 |
+
datapoint = self.data[i]
|
| 34 |
+
guess = self.predictor(datapoint)
|
| 35 |
+
truth = datapoint.price
|
| 36 |
+
error = abs(guess - truth)
|
| 37 |
+
log_error = math.log(truth + 1) - math.log(guess + 1)
|
| 38 |
+
sle = log_error ** 2
|
| 39 |
+
color = self.color_for(error, truth)
|
| 40 |
+
title = datapoint.title if len(datapoint.title) <= 40 else datapoint.title[:40] + "..."
|
| 41 |
+
self.guesses.append(guess)
|
| 42 |
+
self.truths.append(truth)
|
| 43 |
+
self.errors.append(error)
|
| 44 |
+
self.sles.append(sle)
|
| 45 |
+
self.colors.append(color)
|
| 46 |
+
print(
|
| 47 |
+
f"{COLOR_MAP[color]}{i + 1}: Guess: ${guess:,.2f} Truth: ${truth:,.2f} Error: ${error:,.2f} SLE: {sle:,.2f} Item: {title}{RESET}")
|
| 48 |
+
|
| 49 |
+
def chart(self, title):
|
| 50 |
+
max_error = max(self.errors)
|
| 51 |
+
plt.figure(figsize=(12, 8))
|
| 52 |
+
max_val = max(max(self.truths), max(self.guesses))
|
| 53 |
+
plt.plot([0, max_val], [0, max_val], color='deepskyblue', lw=2, alpha=0.6)
|
| 54 |
+
plt.scatter(self.truths, self.guesses, s=3, c=self.colors)
|
| 55 |
+
plt.xlabel('Ground Truth')
|
| 56 |
+
plt.ylabel('Model Estimate')
|
| 57 |
+
plt.xlim(0, max_val)
|
| 58 |
+
plt.ylim(0, max_val)
|
| 59 |
+
plt.title(title)
|
| 60 |
+
plt.show()
|
| 61 |
+
|
| 62 |
+
def report(self):
|
| 63 |
+
average_error = sum(self.errors) / self.size
|
| 64 |
+
rmsle = math.sqrt(sum(self.sles) / self.size)
|
| 65 |
+
hits = sum(1 for color in self.colors if color == "green")
|
| 66 |
+
title = f"{self.title} Error=${average_error:,.2f} RMSLE={rmsle:,.2f} Hits={hits / self.size * 100:.1f}%"
|
| 67 |
+
self.chart(title)
|
| 68 |
+
|
| 69 |
+
def run(self):
|
| 70 |
+
self.error = 0
|
| 71 |
+
for i in range(self.size):
|
| 72 |
+
self.run_datapoint(i)
|
| 73 |
+
self.report()
|
| 74 |
+
|
| 75 |
+
@classmethod
|
| 76 |
+
def test(cls, function, data):
|
| 77 |
+
cls(function, data).run()
|