|
|
| """
|
| Task 4.1 Verification Script.
|
|
|
| Runs the verification code from TASKS.md and validates acceptance criteria:
|
| - 1000+ total samples
|
| - 60% scam, 40% legitimate
|
| - 50% English, 40% Hindi, 10% Hinglish
|
| - All samples validated
|
| """
|
|
|
| import json
|
| import os
|
| import sys
|
|
|
|
|
| if sys.stdout.encoding != 'utf-8':
|
| sys.stdout.reconfigure(encoding='utf-8')
|
|
|
|
|
| DATASET_PATH = os.path.join(
|
| os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
|
| "data",
|
| "scam_detection_train.jsonl"
|
| )
|
|
|
|
|
| def main():
|
| print("=" * 60)
|
| print("Task 4.1: Dataset Creation - Verification")
|
| print("=" * 60)
|
|
|
|
|
| if not os.path.exists(DATASET_PATH):
|
| print(f"\n[ERROR] Dataset file not found: {DATASET_PATH}")
|
| print("Run 'python scripts/generate_dataset.py' first.")
|
| return 1
|
|
|
|
|
| print(f"\nLoading: {DATASET_PATH}")
|
| with open(DATASET_PATH, encoding="utf-8") as f:
|
| data = [json.loads(line) for line in f]
|
|
|
|
|
| print(f"\nTotal samples: {len(data)}")
|
| scam_ratio = sum(1 for d in data if d['label'] == 'scam') / len(data)
|
| print(f"Scam ratio: {scam_ratio:.2%}")
|
|
|
|
|
| print("\n--- Detailed Statistics ---")
|
|
|
|
|
| scam_count = sum(1 for d in data if d['label'] == 'scam')
|
| legit_count = sum(1 for d in data if d['label'] == 'legitimate')
|
| print(f"Scam: {scam_count} ({scam_count/len(data):.1%})")
|
| print(f"Legitimate: {legit_count} ({legit_count/len(data):.1%})")
|
|
|
|
|
| en_count = sum(1 for d in data if d['language'] == 'en')
|
| hi_count = sum(1 for d in data if d['language'] == 'hi')
|
| hinglish_count = sum(1 for d in data if d['language'] == 'hinglish')
|
| print(f"\nEnglish: {en_count} ({en_count/len(data):.1%})")
|
| print(f"Hindi: {hi_count} ({hi_count/len(data):.1%})")
|
| print(f"Hinglish: {hinglish_count} ({hinglish_count/len(data):.1%})")
|
|
|
|
|
| print("\n" + "=" * 60)
|
| print("Acceptance Criteria")
|
| print("=" * 60)
|
|
|
| criteria_results = []
|
|
|
|
|
| ac1_pass = len(data) >= 1000
|
| criteria_results.append(ac1_pass)
|
| print(f"\nAC-1: 1000+ total samples")
|
| print(f" Value: {len(data)}")
|
| print(f" Status: {'PASS' if ac1_pass else 'FAIL'}")
|
|
|
|
|
| ac2_pass = 0.55 <= scam_ratio <= 0.65
|
| criteria_results.append(ac2_pass)
|
| print(f"\nAC-2: 60% scam, 40% legitimate")
|
| print(f" Scam: {scam_ratio:.1%} (expected: 55-65%)")
|
| print(f" Legitimate: {1-scam_ratio:.1%} (expected: 35-45%)")
|
| print(f" Status: {'PASS' if ac2_pass else 'FAIL'}")
|
|
|
|
|
| en_ratio = en_count / len(data)
|
| hi_ratio = hi_count / len(data)
|
| hinglish_ratio = hinglish_count / len(data)
|
| ac3_en = 0.45 <= en_ratio <= 0.55
|
| ac3_hi = 0.35 <= hi_ratio <= 0.45
|
| ac3_hinglish = 0.05 <= hinglish_ratio <= 0.15
|
| ac3_pass = ac3_en and ac3_hi and ac3_hinglish
|
| criteria_results.append(ac3_pass)
|
| print(f"\nAC-3: 50% English, 40% Hindi, 10% Hinglish")
|
| print(f" English: {en_ratio:.1%} (expected: 45-55%) - {'PASS' if ac3_en else 'FAIL'}")
|
| print(f" Hindi: {hi_ratio:.1%} (expected: 35-45%) - {'PASS' if ac3_hi else 'FAIL'}")
|
| print(f" Hinglish: {hinglish_ratio:.1%} (expected: 5-15%) - {'PASS' if ac3_hinglish else 'FAIL'}")
|
| print(f" Status: {'PASS' if ac3_pass else 'FAIL'}")
|
|
|
|
|
| validation_errors = 0
|
| required_fields = ["id", "message", "language", "label", "confidence", "scam_type", "indicators", "metadata"]
|
| for sample in data:
|
| for field in required_fields:
|
| if field not in sample:
|
| validation_errors += 1
|
| break
|
| if "metadata" in sample:
|
| meta = sample["metadata"]
|
| if not all(k in meta for k in ["source", "annotator", "annotation_date", "difficulty"]):
|
| validation_errors += 1
|
|
|
| ac4_pass = validation_errors == 0
|
| criteria_results.append(ac4_pass)
|
| print(f"\nAC-4: All samples validated")
|
| print(f" Validation errors: {validation_errors}")
|
| print(f" Status: {'PASS' if ac4_pass else 'FAIL'}")
|
|
|
|
|
| print("\n" + "=" * 60)
|
| print("SUMMARY")
|
| print("=" * 60)
|
|
|
| all_pass = all(criteria_results)
|
| print(f"\nAC-1 (1000+ samples): {'PASS' if criteria_results[0] else 'FAIL'}")
|
| print(f"AC-2 (60% scam ratio): {'PASS' if criteria_results[1] else 'FAIL'}")
|
| print(f"AC-3 (Language distribution): {'PASS' if criteria_results[2] else 'FAIL'}")
|
| print(f"AC-4 (All validated): {'PASS' if criteria_results[3] else 'FAIL'}")
|
|
|
| if all_pass:
|
| print("\n[SUCCESS] ALL ACCEPTANCE CRITERIA PASSED")
|
| return 0
|
| else:
|
| print("\n[FAILURE] SOME ACCEPTANCE CRITERIA FAILED")
|
| return 1
|
|
|
|
|
| if __name__ == "__main__":
|
| sys.exit(main())
|
|
|