Upload benchmark_config.yaml with huggingface_hub

Browse files

Files changed (1) hide show

benchmark_config.yaml +59 -0

benchmark_config.yaml ADDED Viewed

	@@ -0,0 +1,59 @@

+model:
+  base_url: "http://127.0.0.1:8000"
+  max_tokens: 256
+  temperature: 0.1
+  timeout: 30
+datasets:
+  benchmark_dataset:
+    file_path: "Personal_De-identifier_Benchmark_SFT.jsonl"
+    sample_size: 100  # Use first 100 examples for quick benchmarking
+    instruction_field: "instruction"
+    input_field: "input"
+    expected_output_field: "response"
+metrics:
+  # Primary metrics for HuggingFace
+  pii_detection:
+    name: "PII Detection Rate"
+    description: "Percentage of personal identifiers correctly identified and masked"
+    type: "accuracy"
+  completeness:
+    name: "Completeness Score"
+    description: "Percentage of texts where all PII was successfully removed"
+    type: "binary_accuracy"
+  semantic_preservation:
+    name: "Semantic Preservation"
+    description: "How well the original meaning is preserved (placeholder-based similarity)"
+    type: "similarity"
+  latency:
+    name: "Average Latency"
+    description: "Average response time in milliseconds"
+    type: "latency"
+  # Domain-specific performance
+  domain_performance:
+    medical:
+      name: "Medical Records"
+      keywords: ["patient", "doctor", "hospital", "medical", "diagnosis"]
+    legal:
+      name: "Legal Documents"
+      keywords: ["deponent", "attorney", "case", "court", "legal"]
+    hr:
+      name: "HR Records"
+      keywords: ["employee", "salary", "hr", "personnel", "recruitment"]
+    customer_service:
+      name: "Customer Service"
+      keywords: ["customer", "complaint", "service", "support", "inquiry"]
+    research:
+      name: "Research Data"
+      keywords: ["participant", "study", "research", "consent", "ethics"]
+output:
+  results_file: "benchmarks.txt"
+  detailed_results_file: "benchmark_results.json"
+  include_examples: true
+  max_examples: 10