Spaces:
Sleeping
Sleeping
| """ | |
| Digital Forensics Model Card Generator - Single Form Version | |
| A tool for creating standardized model cards for digital forensics AI/ML models | |
| """ | |
| import gradio as gr | |
| import json | |
| from datetime import datetime | |
| from utils.generator import generate_json_output, generate_markdown_output | |
| from utils.validators import validate_mmcid | |
| # Version | |
| GENERATOR_VERSION = "1.0.0-beta" | |
| # Controlled Vocabularies | |
| CV_USE_CONTEXT = ["Standalone", "Integrated", "Hybrid (both standalone and integrated)"] | |
| CV_CLASSIFICATION = [ | |
| "Computer Forensics", | |
| "Network Forensics", | |
| "Mobile Device Forensics", | |
| "Cloud Forensics", | |
| "Database Forensics", | |
| "Memory Forensics", | |
| "Digital Image Forensics", | |
| "Digital Video/Audio Forensics", | |
| "IoT Forensics", | |
| "Multi-domain (covers multiple types)" | |
| ] | |
| CV_REASONING = [ | |
| "Deductive Reasoning (from general to specific)", | |
| "Inductive Reasoning (from specific to general)", | |
| "Abductive Reasoning (inference to best explanation)", | |
| "Retroductive Reasoning (hypothesis refinement)", | |
| "Hybrid/Mixed Reasoning" | |
| ] | |
| CV_BIAS = [ | |
| "Data Bias (historical, sampling, selection)", | |
| "Algorithmic Bias (model architecture, optimization)", | |
| "Human Bias (cognitive, confirmation, implicit)", | |
| "Deployment Bias (context mismatch)", | |
| "Reporting Bias (documentation gaps)", | |
| "Measurement Bias (proxy variables)", | |
| "Stereotyping Bias (reinforcing stereotypes)", | |
| "Automation Bias (over-reliance on automated results)", | |
| "No Identified Bias", | |
| "Multiple Bias Types" | |
| ] | |
| CV_CAUSE_OF_BIAS = [ | |
| "Unrepresentative Training Data", | |
| "Historical Inequities in Data", | |
| "Feature Selection Issues", | |
| "Labeling Inconsistencies", | |
| "Optimization Objective Mismatch", | |
| "Insufficient Diversity in Development Team", | |
| "Lack of Domain Expertise", | |
| "Temporal Drift (data age/staleness)", | |
| "Geographic/Cultural Limitations", | |
| "Tool/Method Limitations", | |
| "Multiple Causes", | |
| "Unknown/Under Investigation" | |
| ] | |
| CV_CAUSE_OF_ERROR = [ | |
| "Training Error (underfitting)", | |
| "Validation Error (model selection issues)", | |
| "Testing Error (generalization failure)", | |
| "Overfitting (high variance)", | |
| "Underfitting (high bias)", | |
| "Data Quality Issues (noise, outliers, mislabeling)", | |
| "Insufficient Training Data", | |
| "Class Imbalance", | |
| "Feature Engineering Issues", | |
| "Hyperparameter Misconfiguration", | |
| "Model Complexity Mismatch", | |
| "Adversarial Attack (poisoning, evasion)", | |
| "Concept Drift", | |
| "Tool Calibration Error", | |
| "Human Error in Analysis", | |
| "Chain of Custody Issues", | |
| "Multiple Error Sources", | |
| "Unknown/Under Investigation" | |
| ] | |
| def save_to_file(content, filename): | |
| """Helper to save content to a file and return the path""" | |
| filepath = f"/tmp/{filename}" | |
| with open(filepath, 'w') as f: | |
| f.write(content) | |
| return filepath | |
| def generate_model_card(*args): | |
| """Generate model card outputs from form inputs""" | |
| # Unpack all arguments in sequence | |
| (mmcid, version, owner, use_context, layer_n, | |
| case_statement, hypothesis, | |
| classification, classification_other, | |
| reasoning_type, reasoning_other, | |
| bias, bias_other, | |
| cause_of_bias, cause_bias_other, | |
| error, cause_of_error, cause_error_other) = args[:18] | |
| # Remaining args are MC0 and MC1 elements (checkbox + text pairs) | |
| remaining_args = args[18:] | |
| # Validate MMCID if provided | |
| if mmcid and not validate_mmcid(mmcid): | |
| return "❌ Invalid MMCID format. Please use format: DF-MC-YYYY-NNN (e.g., DF-MC-2025-001)", None, None | |
| # Build metadata | |
| metadata = { | |
| "mmcid": mmcid or "Not specified", | |
| "version": version or "N/A", | |
| "owner": owner or "Not specified", | |
| "use_context": use_context or "Not specified", | |
| "layer_n": layer_n or "N/A", | |
| "case_statement": case_statement, | |
| "hypothesis": hypothesis, | |
| "classification": list(classification) + ([classification_other] if classification_other else []), | |
| "reasoning_type": list(reasoning_type) + ([reasoning_other] if reasoning_other else []), | |
| "bias": list(bias) + ([bias_other] if bias_other else []), | |
| "cause_of_bias": list(cause_of_bias) + ([cause_bias_other] if cause_bias_other else []), | |
| "error": error, | |
| "cause_of_error": list(cause_of_error) + ([cause_error_other] if cause_error_other else []) | |
| } | |
| # MC0 Top Level Elements (9 elements after removing duplicates) | |
| mc0_keys = [ | |
| "algorithm", "inference", "confounder", "evaluation", "tool", | |
| "evidence_mc1", "file_type", "data_structure", "degree_of_confidence" | |
| ] | |
| top_level = {} | |
| for i, key in enumerate(mc0_keys): | |
| check_val = remaining_args[i*2] | |
| desc_val = remaining_args[i*2 + 1] | |
| top_level[key] = { | |
| "applicable": check_val, | |
| "description": desc_val if check_val else "" | |
| } | |
| # MC1 Data & Processes (19 elements) | |
| process_start_idx = len(mc0_keys) * 2 | |
| process_keys = [ | |
| "event_data", "parse_raw_data", "validate", "identify_partitions", | |
| "process_file_system", "identify_content_carving", "file_type_identification", | |
| "file_specific_processing", "file_hashing", "hash_matching", | |
| "mismatched_signature_detection", "timeline", "timeline_analysis", | |
| "geolocation", "geolocation_analysis", "keyword_indexing", | |
| "keyword_searching", "automated_result_interpretation", "ai_based_content_flagging" | |
| ] | |
| processes = {} | |
| for i, key in enumerate(process_keys): | |
| idx = process_start_idx + (i * 2) | |
| check_val = remaining_args[idx] | |
| desc_val = remaining_args[idx + 1] | |
| processes[key] = { | |
| "applicable": check_val, | |
| "description": desc_val if check_val else "" | |
| } | |
| # Generate outputs | |
| json_output = generate_json_output(metadata, top_level, processes, GENERATOR_VERSION) | |
| markdown_output = generate_markdown_output(metadata, top_level, processes, GENERATOR_VERSION) | |
| # Save to files | |
| json_file = save_to_file(json_output, "model_card.json") | |
| md_file = save_to_file(markdown_output, "README.md") | |
| return markdown_output, json_file, md_file | |
| # Build Single-Form Gradio Interface | |
| with gr.Blocks(title="Digital Forensics Model Card Generator", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown(f""" | |
| # 🔬 Digital Forensics Model Card Generator | |
| Create standardized model cards for digital forensics AI/ML systems. | |
| **Based on:** | |
| - Di Maio, P. (2024). Towards Open Standards for Systemic Complexity in Digital Forensics | |
| - Hargreaves, C., Nelson, A., & Casey, E. (2024). An abstract model for digital forensic analysis tools | |
| **Version:** {GENERATOR_VERSION} | |
| --- | |
| """) | |
| # SECTION 1: IDENTIFICATION & CONTEXT | |
| gr.Markdown("## 📋 Section 1: Identification & Context") | |
| with gr.Row(): | |
| mmcid = gr.Textbox( | |
| label="MMCID - Identifier", | |
| placeholder="DF-MC-2025-001", | |
| info="Format: DF-MC-YYYY-NNN" | |
| ) | |
| version = gr.Textbox( | |
| label="MCV - Version", | |
| placeholder="1.0 or N/A" | |
| ) | |
| with gr.Row(): | |
| owner = gr.Textbox( | |
| label="DF-MCO - Owner", | |
| placeholder="Organization or individual name" | |
| ) | |
| use_context = gr.Dropdown( | |
| choices=CV_USE_CONTEXT, | |
| label="DF-MCUse - Usage Context" | |
| ) | |
| layer_n = gr.Textbox( | |
| label="DF-MC Ln - Layer/Stage", | |
| placeholder="Specify layer or stage number if applicable" | |
| ) | |
| # SECTION 2: CASE CONTEXT | |
| gr.Markdown("## 📝 Section 2: Case Context") | |
| case_statement = gr.TextArea( | |
| label="DF-MC CS - Case Statement", | |
| placeholder="Describe the case context, investigation scope, and objectives...", | |
| lines=3 | |
| ) | |
| hypothesis = gr.TextArea( | |
| label="DF-MC H - Hypothesis", | |
| placeholder="State the hypothesis being tested or investigated...", | |
| lines=3 | |
| ) | |
| # SECTION 3: CLASSIFICATION & APPROACH | |
| gr.Markdown("## 🔍 Section 3: Classification & Approach") | |
| gr.Markdown("*Select up to 3 items from each controlled vocabulary*") | |
| with gr.Row(): | |
| with gr.Column(): | |
| classification = gr.CheckboxGroup( | |
| choices=CV_CLASSIFICATION, | |
| label="DF-MC C - Classification (max 3)", | |
| info="Select forensic domain(s)" | |
| ) | |
| with gr.Column(): | |
| classification_other = gr.Textbox( | |
| label="Other Classification", | |
| placeholder="Specify if not listed" | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| reasoning_type = gr.CheckboxGroup( | |
| choices=CV_REASONING, | |
| label="DF-MC TR - Type of Reasoning (max 3)", | |
| info="Select reasoning method(s)" | |
| ) | |
| with gr.Column(): | |
| reasoning_other = gr.Textbox( | |
| label="Other Reasoning", | |
| placeholder="Specify if not listed" | |
| ) | |
| # SECTION 4: QUALITY & LIMITATIONS | |
| gr.Markdown("## ⚠️ Section 4: Quality & Limitations") | |
| with gr.Row(): | |
| with gr.Column(): | |
| bias = gr.CheckboxGroup( | |
| choices=CV_BIAS, | |
| label="DF-MC B - Bias (max 3)", | |
| info="Identify bias type(s)" | |
| ) | |
| with gr.Column(): | |
| bias_other = gr.Textbox( | |
| label="Other Bias", | |
| placeholder="Specify if not listed" | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| cause_of_bias = gr.CheckboxGroup( | |
| choices=CV_CAUSE_OF_BIAS, | |
| label="DF-MC CB - Cause of Bias (max 3)", | |
| info="Identify root cause(s)" | |
| ) | |
| with gr.Column(): | |
| cause_bias_other = gr.Textbox( | |
| label="Other Cause of Bias", | |
| placeholder="Specify if not listed" | |
| ) | |
| error = gr.TextArea( | |
| label="DF-MC E - Error Description", | |
| placeholder="Describe any errors encountered during analysis...", | |
| lines=3 | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| cause_of_error = gr.CheckboxGroup( | |
| choices=CV_CAUSE_OF_ERROR, | |
| label="DF-MC CE - Cause of Error (max 3)", | |
| info="Identify error cause(s)" | |
| ) | |
| with gr.Column(): | |
| cause_error_other = gr.Textbox( | |
| label="Other Cause of Error", | |
| placeholder="Specify if not listed" | |
| ) | |
| # SECTION 5: TOP LEVEL ELEMENTS (MC0 - Figure 6, deduplicated) | |
| gr.Markdown("## 🔝 Section 5: Top Level Elements (DF MC 0 - Figure 6)") | |
| gr.Markdown("*Check applicable elements and provide descriptions*") | |
| mc0_elements = [ | |
| ("algorithm", "Algorithm"), | |
| ("inference", "Inference"), | |
| ("confounder", "Confounder"), | |
| ("evaluation", "Evaluation"), | |
| ("tool", "Tool"), | |
| ("evidence_mc1", "Evidence MC1"), | |
| ("file_type", "File Type"), | |
| ("data_structure", "Data Structure"), | |
| ("degree_confidence", "Degree of Confidence") | |
| ] | |
| mc0_components = [] | |
| for elem_id, elem_label in mc0_elements: | |
| with gr.Row(): | |
| check = gr.Checkbox(label=f"✓ {elem_label}", value=False) | |
| desc = gr.TextArea( | |
| label=f"Description", | |
| placeholder=f"Describe {elem_label.lower()} if applicable...", | |
| lines=2 | |
| ) | |
| mc0_components.extend([check, desc]) | |
| # SECTION 6: DATA & PROCESSES (MC1 - Figure 7) | |
| gr.Markdown("## ⚙️ Section 6: Data Types & Analytical Processes (DF MC 1 - Figure 7)") | |
| gr.Markdown("*Check applicable processes and describe how they were performed*") | |
| mc1_processes = [ | |
| ("event_data", "EVENT/DATA"), | |
| ("parse_raw", "Parse Raw Data Contained Within the Image"), | |
| ("validate", "Validate the Data Compared"), | |
| ("identify_partitions", "Identify Partitions"), | |
| ("process_filesystem", "Process File System"), | |
| ("identify_content", "Identify Content (Carving)"), | |
| ("file_type_id", "File Type Identification"), | |
| ("file_specific", "File-Specific Processing"), | |
| ("file_hashing", "File Hashing"), | |
| ("hash_matching", "Hash Matching"), | |
| ("mismatched_sig", "Mismatched Signature Detection"), | |
| ("timeline", "Timeline"), | |
| ("timeline_analysis", "Timeline Analysis"), | |
| ("geolocation", "Geolocation"), | |
| ("geolocation_analysis", "Geolocation Analysis"), | |
| ("keyword_indexing", "Keyword Indexing"), | |
| ("keyword_searching", "Keyword Searching"), | |
| ("automated_result", "Automated Result Interpretation"), | |
| ("ai_content_flag", "AI-Based Content Flagging") | |
| ] | |
| mc1_components = [] | |
| for proc_id, proc_label in mc1_processes: | |
| with gr.Row(): | |
| check = gr.Checkbox(label=f"✓ {proc_label}", value=False) | |
| desc = gr.TextArea( | |
| label=f"Description", | |
| placeholder=f"Describe how {proc_label.lower()} was performed...", | |
| lines=2 | |
| ) | |
| mc1_components.extend([check, desc]) | |
| # GENERATION & OUTPUT | |
| gr.Markdown("---") | |
| gr.Markdown("## 🚀 Generate Your Model Card") | |
| generate_btn = gr.Button("Generate Model Card", variant="primary", size="lg") | |
| gr.Markdown("### Preview & Download") | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("**Markdown Preview:**") | |
| preview_output = gr.Markdown() | |
| with gr.Column(): | |
| gr.Markdown("**Download Files:**") | |
| json_download = gr.File(label="JSON File", type="filepath") | |
| md_download = gr.File(label="README.md", type="filepath") | |
| # Wire up generation | |
| all_inputs = [ | |
| mmcid, version, owner, use_context, layer_n, | |
| case_statement, hypothesis, | |
| classification, classification_other, | |
| reasoning_type, reasoning_other, | |
| bias, bias_other, | |
| cause_of_bias, cause_bias_other, | |
| error, cause_of_error, cause_error_other | |
| ] + mc0_components + mc1_components | |
| generate_btn.click( | |
| fn=generate_model_card, | |
| inputs=all_inputs, | |
| outputs=[preview_output, json_download, md_download] | |
| ) | |
| gr.Markdown(f""" | |
| --- | |
| ### 📚 References & Information | |
| **References:** | |
| - Di Maio, P. (2024). Towards Open Standards for Systemic Complexity in Digital Forensics. https://papers.cool.arxiv/2512.12970 | |
| - Hargreaves, C., Nelson, A., & Casey, E. (2024). An abstract model for digital forensic analysis tools—A foundation for systematic error mitigation analysis. *Forensic Science International: Digital Investigation*, 48. | |
| **Generator Version:** {GENERATOR_VERSION} (Beta) | |
| **License:** Apache 2.0 | |
| *This is a beta version. All fields are optional. Feedback welcome!* | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() | |