|
|
""" |
|
|
Validation Dataset Loader for UI Validation Use Case |
|
|
|
|
|
Loads validation datapoints from SQLite database and converts to GEPA-compatible format. |
|
|
Supports filtering by data_type (trainset/valset/testset) and confirmed status. |
|
|
""" |
|
|
|
|
|
import os |
|
|
import sqlite3 |
|
|
import base64 |
|
|
import logging |
|
|
from typing import List, Dict, Any, Optional, Literal |
|
|
from pathlib import Path |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
class ValidationDatasetLoader: |
|
|
""" |
|
|
Loads validation dataset from SQLite database. |
|
|
|
|
|
Database schema: |
|
|
- validation_data: id, image_id, command, result (0/1), reasoning, data_type, confirmed, created_at |
|
|
- images: image_id, mime, bytes (BLOB), created_at |
|
|
|
|
|
Converts to GEPA format: |
|
|
- input: command text (seed prompt will be provided in test script) |
|
|
- output: "true" or "false" (converted from 0/1) |
|
|
- image_base64: base64 encoded image (TOP LEVEL for UniversalConverter) |
|
|
- metadata: All original fields plus converted values |
|
|
|
|
|
Note: The seed prompt is NOT stored in database - it will be provided in the test script. |
|
|
The input field contains just the command, and the image is at top level. |
|
|
""" |
|
|
|
|
|
def __init__( |
|
|
self, |
|
|
db_path: Optional[str] = None, |
|
|
confirmed_only: bool = True |
|
|
): |
|
|
""" |
|
|
Initialize validation dataset loader. |
|
|
|
|
|
Args: |
|
|
db_path: Path to SQLite database file. |
|
|
Default: "./validation_data.db" or from VD_DB_PATH env var |
|
|
confirmed_only: If True, only load datapoints where confirmed=1. |
|
|
Default: True (only manually reviewed data) |
|
|
|
|
|
Raises: |
|
|
FileNotFoundError: If database file doesn't exist |
|
|
sqlite3.Error: If database connection fails |
|
|
""" |
|
|
|
|
|
if db_path is None: |
|
|
db_path = os.getenv("VD_DB_PATH", "./validation_data.db") |
|
|
|
|
|
self.db_path = Path(db_path).resolve() |
|
|
|
|
|
if not self.db_path.exists(): |
|
|
raise FileNotFoundError( |
|
|
f"Database file not found: {self.db_path}\n" |
|
|
f"Make sure validation_data_ui_server_async.py has been run at least once to create the database." |
|
|
) |
|
|
|
|
|
self.confirmed_only = confirmed_only |
|
|
|
|
|
def load_dataset( |
|
|
self, |
|
|
data_type: Optional[Literal["trainset", "valset", "testset"]] = None, |
|
|
confirmed_only: Optional[bool] = None |
|
|
) -> List[Dict[str, Any]]: |
|
|
""" |
|
|
Load dataset from database and convert to GEPA format. |
|
|
|
|
|
Args: |
|
|
data_type: Filter by data_type. If None, loads all types. |
|
|
Options: "trainset", "valset", "testset" |
|
|
confirmed_only: Override instance default. If True, only load confirmed datapoints. |
|
|
If None, uses instance default (self.confirmed_only) |
|
|
|
|
|
Returns: |
|
|
List of dataset items in GEPA format: |
|
|
[ |
|
|
{ |
|
|
"input": "Validate Submit button is visible", # Command only (seed prompt in test script) |
|
|
"output": "true", # or "false" (converted from 0/1) |
|
|
"image_base64": "<base64_encoded_image>", # TOP LEVEL (image + command together) |
|
|
"metadata": { |
|
|
"id": 1, |
|
|
"image_id": "abc123...", |
|
|
"command": "Validate Submit button is visible", |
|
|
"result": True, # Boolean |
|
|
"result_int": 1, # Original 0/1 |
|
|
"reasoning": "Detailed explanation...", |
|
|
"data_type": "trainset", |
|
|
"confirmed": True, |
|
|
"created_at": "2024-01-01 12:00:00" |
|
|
} |
|
|
}, |
|
|
... |
|
|
] |
|
|
|
|
|
Note: Seed prompt is provided separately in test script, not in database. |
|
|
|
|
|
Raises: |
|
|
sqlite3.Error: If database query fails |
|
|
ValueError: If no datapoints found matching criteria |
|
|
""" |
|
|
|
|
|
use_confirmed = confirmed_only if confirmed_only is not None else self.confirmed_only |
|
|
|
|
|
conn = sqlite3.connect(str(self.db_path)) |
|
|
conn.row_factory = sqlite3.Row |
|
|
dataset = [] |
|
|
|
|
|
try: |
|
|
|
|
|
query = """ |
|
|
SELECT |
|
|
v.id, |
|
|
v.image_id, |
|
|
v.command, |
|
|
v.result, |
|
|
v.reasoning, |
|
|
v.data_type, |
|
|
v.confirmed, |
|
|
v.created_at, |
|
|
i.mime, |
|
|
i.bytes |
|
|
FROM validation_data v |
|
|
INNER JOIN images i ON v.image_id = i.image_id |
|
|
WHERE 1=1 |
|
|
""" |
|
|
params = [] |
|
|
|
|
|
|
|
|
if use_confirmed: |
|
|
query += " AND v.confirmed = 1" |
|
|
|
|
|
if data_type: |
|
|
query += " AND v.data_type = ?" |
|
|
params.append(data_type) |
|
|
|
|
|
query += " ORDER BY v.id ASC" |
|
|
|
|
|
|
|
|
cursor = conn.execute(query, params) |
|
|
rows = cursor.fetchall() |
|
|
|
|
|
if not rows: |
|
|
filter_msg = [] |
|
|
if use_confirmed: |
|
|
filter_msg.append("confirmed=1") |
|
|
if data_type: |
|
|
filter_msg.append(f"data_type='{data_type}'") |
|
|
|
|
|
filter_str = " with filters: " + ", ".join(filter_msg) if filter_msg else "" |
|
|
raise ValueError( |
|
|
f"No datapoints found{filter_str} in database: {self.db_path}\n" |
|
|
f"Make sure you have generated and saved datapoints using the validation UI." |
|
|
) |
|
|
|
|
|
|
|
|
for row in rows: |
|
|
|
|
|
result_str = "true" if row["result"] == 1 else "false" |
|
|
|
|
|
|
|
|
image_base64 = base64.b64encode(row["bytes"]).decode("utf-8") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
dataset_item = { |
|
|
"input": row["command"], |
|
|
"output": result_str, |
|
|
"image_base64": image_base64, |
|
|
"metadata": { |
|
|
"id": row["id"], |
|
|
"image_id": row["image_id"], |
|
|
"command": row["command"], |
|
|
"result": bool(row["result"]), |
|
|
"result_int": row["result"], |
|
|
"reasoning": row["reasoning"], |
|
|
"data_type": row["data_type"], |
|
|
"confirmed": bool(row["confirmed"]), |
|
|
"created_at": row["created_at"], |
|
|
"mime": row["mime"], |
|
|
} |
|
|
} |
|
|
|
|
|
dataset.append(dataset_item) |
|
|
|
|
|
|
|
|
data_type_str = f" ({data_type})" if data_type else "" |
|
|
confirmed_str = " (confirmed only)" if use_confirmed else " (all)" |
|
|
logger.info(f"Loaded {len(dataset)} validation datapoints{data_type_str}{confirmed_str}") |
|
|
|
|
|
return dataset |
|
|
|
|
|
finally: |
|
|
conn.close() |
|
|
|
|
|
def load_split_dataset( |
|
|
self, |
|
|
confirmed_only: Optional[bool] = None |
|
|
) -> tuple[List[Dict[str, Any]], List[Dict[str, Any]], List[Dict[str, Any]]]: |
|
|
""" |
|
|
Load dataset split by data_type (trainset/valset/testset). |
|
|
|
|
|
Convenience method that loads all three splits at once. |
|
|
|
|
|
Args: |
|
|
confirmed_only: Override instance default. If True, only load confirmed datapoints. |
|
|
|
|
|
Returns: |
|
|
Tuple of (train_set, val_set, test_set) in GEPA format |
|
|
|
|
|
Example: |
|
|
loader = ValidationDatasetLoader(db_path="./validation_data.db") |
|
|
train, val, test = loader.load_split_dataset() |
|
|
""" |
|
|
train_set = self.load_dataset(data_type="trainset", confirmed_only=confirmed_only) |
|
|
val_set = self.load_dataset(data_type="valset", confirmed_only=confirmed_only) |
|
|
test_set = self.load_dataset(data_type="testset", confirmed_only=confirmed_only) |
|
|
|
|
|
logger.info(f"Dataset Split Summary: Training={len(train_set)}, Validation={len(val_set)}, Test={len(test_set)}, Total={len(train_set) + len(val_set) + len(test_set)}") |
|
|
|
|
|
return train_set, val_set, test_set |
|
|
|
|
|
def get_dataset_stats(self) -> Dict[str, Any]: |
|
|
""" |
|
|
Get statistics about the dataset in the database. |
|
|
|
|
|
Returns: |
|
|
Dictionary with dataset statistics: |
|
|
{ |
|
|
"total": 100, |
|
|
"confirmed": 95, |
|
|
"unconfirmed": 5, |
|
|
"by_data_type": { |
|
|
"trainset": 70, |
|
|
"valset": 15, |
|
|
"testset": 15 |
|
|
}, |
|
|
"by_result": { |
|
|
"true": 50, |
|
|
"false": 50 |
|
|
} |
|
|
} |
|
|
""" |
|
|
conn = sqlite3.connect(str(self.db_path)) |
|
|
conn.row_factory = sqlite3.Row |
|
|
|
|
|
try: |
|
|
stats = {} |
|
|
|
|
|
|
|
|
total = conn.execute("SELECT COUNT(*) FROM validation_data").fetchone()[0] |
|
|
confirmed = conn.execute("SELECT COUNT(*) FROM validation_data WHERE confirmed = 1").fetchone()[0] |
|
|
stats["total"] = total |
|
|
stats["confirmed"] = confirmed |
|
|
stats["unconfirmed"] = total - confirmed |
|
|
|
|
|
|
|
|
data_type_rows = conn.execute(""" |
|
|
SELECT data_type, COUNT(*) as count |
|
|
FROM validation_data |
|
|
GROUP BY data_type |
|
|
""").fetchall() |
|
|
stats["by_data_type"] = {row["data_type"]: row["count"] for row in data_type_rows} |
|
|
|
|
|
|
|
|
result_rows = conn.execute(""" |
|
|
SELECT result, COUNT(*) as count |
|
|
FROM validation_data |
|
|
GROUP BY result |
|
|
""").fetchall() |
|
|
stats["by_result"] = { |
|
|
"true": sum(row["count"] for row in result_rows if row["result"] == 1), |
|
|
"false": sum(row["count"] for row in result_rows if row["result"] == 0) |
|
|
} |
|
|
|
|
|
return stats |
|
|
|
|
|
finally: |
|
|
conn.close() |
|
|
|
|
|
|
|
|
def load_validation_dataset( |
|
|
db_path: Optional[str] = None, |
|
|
data_type: Optional[Literal["trainset", "valset", "testset"]] = None, |
|
|
confirmed_only: bool = True |
|
|
) -> List[Dict[str, Any]]: |
|
|
""" |
|
|
Convenience function to load validation dataset. |
|
|
|
|
|
Args: |
|
|
db_path: Path to SQLite database file. Default: "./validation_data.db" |
|
|
data_type: Filter by data_type. If None, loads all types. |
|
|
confirmed_only: If True, only load confirmed datapoints. |
|
|
|
|
|
Returns: |
|
|
List of dataset items in GEPA format |
|
|
|
|
|
Example: |
|
|
# Load all confirmed training data |
|
|
train_data = load_validation_dataset(data_type="trainset", confirmed_only=True) |
|
|
|
|
|
# Load all confirmed data |
|
|
all_data = load_validation_dataset(confirmed_only=True) |
|
|
""" |
|
|
loader = ValidationDatasetLoader(db_path=db_path, confirmed_only=confirmed_only) |
|
|
return loader.load_dataset(data_type=data_type, confirmed_only=confirmed_only) |
|
|
|
|
|
|
|
|
def load_validation_split( |
|
|
db_path: Optional[str] = None, |
|
|
confirmed_only: bool = True |
|
|
) -> tuple[List[Dict[str, Any]], List[Dict[str, Any]], List[Dict[str, Any]]]: |
|
|
""" |
|
|
Convenience function to load validation dataset split by data_type. |
|
|
|
|
|
Args: |
|
|
db_path: Path to SQLite database file. Default: "./validation_data.db" |
|
|
confirmed_only: If True, only load confirmed datapoints. |
|
|
|
|
|
Returns: |
|
|
Tuple of (train_set, val_set, test_set) in GEPA format |
|
|
|
|
|
Example: |
|
|
train, val, test = load_validation_split(confirmed_only=True) |
|
|
""" |
|
|
loader = ValidationDatasetLoader(db_path=db_path, confirmed_only=confirmed_only) |
|
|
return loader.load_split_dataset(confirmed_only=confirmed_only) |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
print("🚀 Testing Validation Dataset Loader...") |
|
|
|
|
|
try: |
|
|
loader = ValidationDatasetLoader() |
|
|
|
|
|
|
|
|
print("\n📊 Dataset Statistics:") |
|
|
stats = loader.get_dataset_stats() |
|
|
print(f" Total: {stats['total']}") |
|
|
print(f" Confirmed: {stats['confirmed']}") |
|
|
print(f" Unconfirmed: {stats['unconfirmed']}") |
|
|
print(f" By data_type: {stats['by_data_type']}") |
|
|
print(f" By result: {stats['by_result']}") |
|
|
|
|
|
|
|
|
print("\n📦 Loading split dataset...") |
|
|
train, val, test = loader.load_split_dataset() |
|
|
|
|
|
|
|
|
if train: |
|
|
sample = train[0] |
|
|
print(f"\n📝 Sample Training Item:") |
|
|
print(f" Input: {sample['input']}") |
|
|
print(f" Output: {sample['output']}") |
|
|
print(f" Image ID: {sample['metadata']['image_id'][:8]}...") |
|
|
print(f" Data Type: {sample['metadata']['data_type']}") |
|
|
print(f" Result: {sample['metadata']['result']} (int: {sample['metadata']['result_int']})") |
|
|
|
|
|
except FileNotFoundError as e: |
|
|
print(f"❌ {e}") |
|
|
print("\n💡 Make sure validation_data_ui_server_async.py has been run to create the database.") |
|
|
except ValueError as e: |
|
|
print(f"❌ {e}") |
|
|
print("\n💡 Generate and save some datapoints using the validation UI first.") |
|
|
|
|
|
|