| | model: |
| | base_url: "http://127.0.0.1:8000" |
| | max_tokens: 256 |
| | temperature: 0.1 |
| | timeout: 30 |
| |
|
| | datasets: |
| | benchmark_dataset: |
| | file_path: "Personal_De-identifier_Benchmark_SFT.jsonl" |
| | sample_size: 100 |
| | instruction_field: "instruction" |
| | input_field: "input" |
| | expected_output_field: "response" |
| |
|
| | metrics: |
| | |
| | pii_detection: |
| | name: "PII Detection Rate" |
| | description: "Percentage of personal identifiers correctly identified and masked" |
| | type: "accuracy" |
| |
|
| | completeness: |
| | name: "Completeness Score" |
| | description: "Percentage of texts where all PII was successfully removed" |
| | type: "binary_accuracy" |
| |
|
| | semantic_preservation: |
| | name: "Semantic Preservation" |
| | description: "How well the original meaning is preserved (placeholder-based similarity)" |
| | type: "similarity" |
| |
|
| | latency: |
| | name: "Average Latency" |
| | description: "Average response time in milliseconds" |
| | type: "latency" |
| |
|
| | |
| | domain_performance: |
| | medical: |
| | name: "Medical Records" |
| | keywords: ["patient", "doctor", "hospital", "medical", "diagnosis"] |
| | legal: |
| | name: "Legal Documents" |
| | keywords: ["deponent", "attorney", "case", "court", "legal"] |
| | hr: |
| | name: "HR Records" |
| | keywords: ["employee", "salary", "hr", "personnel", "recruitment"] |
| | customer_service: |
| | name: "Customer Service" |
| | keywords: ["customer", "complaint", "service", "support", "inquiry"] |
| | research: |
| | name: "Research Data" |
| | keywords: ["participant", "study", "research", "consent", "ethics"] |
| |
|
| | output: |
| | results_file: "benchmarks.txt" |
| | detailed_results_file: "benchmark_results.json" |
| | include_examples: true |
| | max_examples: 10 |
| |
|