| import json |
| import sys |
| from pathlib import Path |
|
|
| ROOT = Path(__file__).resolve().parents[1] |
| if str(ROOT) not in sys.path: |
| sys.path.insert(0, str(ROOT)) |
|
|
| import app |
|
|
|
|
| def _assistant_text(result): |
| history = result[0] or [] |
| return history[-1]["content"] if history else "" |
|
|
|
|
| def _scenario(name, message, history, active_schema, state): |
| result = app.generate_response( |
| message, |
| history, |
| active_schema, |
| app.FINE_TUNED_MODEL_KEY, |
| None, |
| state, |
| ) |
| return { |
| "name": name, |
| "message": message, |
| "assistant": _assistant_text(result), |
| "sql": result[4], |
| "status": result[7], |
| "active_schema": result[2], |
| "state": result[8], |
| "history": result[0], |
| } |
|
|
|
|
| def _contains_any(text, needles): |
| text = (text or "").lower() |
| return any(needle.lower() in text for needle in needles) |
|
|
|
|
| def _grade(records): |
| checks = [] |
| by_name = {record["name"]: record for record in records} |
|
|
| checks.append({ |
| "name": "smalltalk_is_conversational", |
| "pass": bool(by_name["greeting"]["assistant"]) and not by_name["greeting"]["sql"], |
| "detail": "Greeting should produce chat text and no SQL.", |
| }) |
| checks.append({ |
| "name": "schema_suggestion_sets_pending", |
| "pass": bool((by_name["schema_request"]["state"] or {}).get("pending_schema_suggestion")), |
| "detail": "Domain table request should create a pending schema proposal.", |
| }) |
| checks.append({ |
| "name": "confirmation_generates_create_table", |
| "pass": "CREATE TABLE" in (by_name["confirm_generate"]["sql"] or "").upper(), |
| "detail": "Confirmation should generate CREATE TABLE SQL.", |
| }) |
| checks.append({ |
| "name": "edit_updates_schema", |
| "pass": _contains_any(by_name["edit_schema"]["sql"], ["numero_animais", "num_animais"]), |
| "detail": "Edit should replace capacidade with an animal-count column.", |
| }) |
| checks.append({ |
| "name": "query_generates_select", |
| "pass": "SELECT" in (by_name["query_schema"]["sql"] or "").upper(), |
| "detail": "Natural query should generate SELECT SQL.", |
| }) |
| checks.append({ |
| "name": "smalltalk_with_schema_stays_chat", |
| "pass": bool(by_name["smalltalk_with_schema"]["assistant"]) and not by_name["smalltalk_with_schema"]["sql"], |
| "detail": "Smalltalk with active schema should not become SQL.", |
| }) |
| return checks |
|
|
|
|
| def main(): |
| app.load_model(app.FINE_TUNED_MODEL_ID) |
|
|
| history = [] |
| active_schema = "" |
| state = app.chat_core.default_state() |
| records = [] |
|
|
| for name, message in [ |
| ("greeting", "oi"), |
| ("schema_request", "preciso de uma tabela sobre zoologico"), |
| ("confirm_generate", "gera"), |
| ("edit_schema", "troca capacidade por numero_animais"), |
| ("query_schema", "liste zoologicos de Sao Paulo"), |
| ("smalltalk_with_schema", "como voce esta hoje?"), |
| ]: |
| record = _scenario(name, message, history, active_schema, state) |
| records.append({key: value for key, value in record.items() if key != "history"}) |
| history = record["history"] |
| active_schema = record["active_schema"] |
| state = record["state"] |
|
|
| checks = _grade(records) |
| report = { |
| "model": app.FINE_TUNED_MODEL_ID, |
| "passed": all(check["pass"] for check in checks), |
| "checks": checks, |
| "records": records, |
| } |
| print(json.dumps(report, ensure_ascii=False, indent=2)) |
| return 0 if report["passed"] else 1 |
|
|
|
|
| if __name__ == "__main__": |
| raise SystemExit(main()) |
|
|
|
|