Minibase commited on
Commit
d16cb83
·
verified ·
1 Parent(s): fb21b1a

Upload benchmark_config.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. benchmark_config.yaml +59 -0
benchmark_config.yaml ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_url: "http://127.0.0.1:8000"
3
+ max_tokens: 256
4
+ temperature: 0.1
5
+ timeout: 30
6
+
7
+ datasets:
8
+ benchmark_dataset:
9
+ file_path: "Personal_De-identifier_Benchmark_SFT.jsonl"
10
+ sample_size: 100 # Use first 100 examples for quick benchmarking
11
+ instruction_field: "instruction"
12
+ input_field: "input"
13
+ expected_output_field: "response"
14
+
15
+ metrics:
16
+ # Primary metrics for HuggingFace
17
+ pii_detection:
18
+ name: "PII Detection Rate"
19
+ description: "Percentage of personal identifiers correctly identified and masked"
20
+ type: "accuracy"
21
+
22
+ completeness:
23
+ name: "Completeness Score"
24
+ description: "Percentage of texts where all PII was successfully removed"
25
+ type: "binary_accuracy"
26
+
27
+ semantic_preservation:
28
+ name: "Semantic Preservation"
29
+ description: "How well the original meaning is preserved (placeholder-based similarity)"
30
+ type: "similarity"
31
+
32
+ latency:
33
+ name: "Average Latency"
34
+ description: "Average response time in milliseconds"
35
+ type: "latency"
36
+
37
+ # Domain-specific performance
38
+ domain_performance:
39
+ medical:
40
+ name: "Medical Records"
41
+ keywords: ["patient", "doctor", "hospital", "medical", "diagnosis"]
42
+ legal:
43
+ name: "Legal Documents"
44
+ keywords: ["deponent", "attorney", "case", "court", "legal"]
45
+ hr:
46
+ name: "HR Records"
47
+ keywords: ["employee", "salary", "hr", "personnel", "recruitment"]
48
+ customer_service:
49
+ name: "Customer Service"
50
+ keywords: ["customer", "complaint", "service", "support", "inquiry"]
51
+ research:
52
+ name: "Research Data"
53
+ keywords: ["participant", "study", "research", "consent", "ethics"]
54
+
55
+ output:
56
+ results_file: "benchmarks.txt"
57
+ detailed_results_file: "benchmark_results.json"
58
+ include_examples: true
59
+ max_examples: 10