Spaces:

STARBORN
/

forensics_mc_generator

Sleeping

App Files Files Community

STARBORN commited on Dec 19, 2025

Commit

ca65075

verified ·

1 Parent(s): a2797d3

Upload app(5).py

Browse files

Files changed (1) hide show

app(5).py +430 -0

app(5).py ADDED Viewed

	@@ -0,0 +1,430 @@

+"""
+Digital Forensics Model Card Generator - Single Form Version
+A tool for creating standardized model cards for digital forensics AI/ML models
+"""
+import gradio as gr
+import json
+from datetime import datetime
+from utils.generator import generate_json_output, generate_markdown_output
+from utils.validators import validate_mmcid
+# Version
+GENERATOR_VERSION = "1.0.0-beta"
+# Controlled Vocabularies
+CV_USE_CONTEXT = ["Standalone", "Integrated", "Hybrid (both standalone and integrated)"]
+CV_CLASSIFICATION = [
+    "Computer Forensics",
+    "Network Forensics",
+    "Mobile Device Forensics",
+    "Cloud Forensics",
+    "Database Forensics",
+    "Memory Forensics",
+    "Digital Image Forensics",
+    "Digital Video/Audio Forensics",
+    "IoT Forensics",
+    "Multi-domain (covers multiple types)"
+]
+CV_REASONING = [
+    "Deductive Reasoning (from general to specific)",
+    "Inductive Reasoning (from specific to general)",
+    "Abductive Reasoning (inference to best explanation)",
+    "Retroductive Reasoning (hypothesis refinement)",
+    "Hybrid/Mixed Reasoning"
+]
+CV_BIAS = [
+    "Data Bias (historical, sampling, selection)",
+    "Algorithmic Bias (model architecture, optimization)",
+    "Human Bias (cognitive, confirmation, implicit)",
+    "Deployment Bias (context mismatch)",
+    "Reporting Bias (documentation gaps)",
+    "Measurement Bias (proxy variables)",
+    "Stereotyping Bias (reinforcing stereotypes)",
+    "Automation Bias (over-reliance on automated results)",
+    "No Identified Bias",
+    "Multiple Bias Types"
+]
+CV_CAUSE_OF_BIAS = [
+    "Unrepresentative Training Data",
+    "Historical Inequities in Data",
+    "Feature Selection Issues",
+    "Labeling Inconsistencies",
+    "Optimization Objective Mismatch",
+    "Insufficient Diversity in Development Team",
+    "Lack of Domain Expertise",
+    "Temporal Drift (data age/staleness)",
+    "Geographic/Cultural Limitations",
+    "Tool/Method Limitations",
+    "Multiple Causes",
+    "Unknown/Under Investigation"
+]
+CV_CAUSE_OF_ERROR = [
+    "Training Error (underfitting)",
+    "Validation Error (model selection issues)",
+    "Testing Error (generalization failure)",
+    "Overfitting (high variance)",
+    "Underfitting (high bias)",
+    "Data Quality Issues (noise, outliers, mislabeling)",
+    "Insufficient Training Data",
+    "Class Imbalance",
+    "Feature Engineering Issues",
+    "Hyperparameter Misconfiguration",
+    "Model Complexity Mismatch",
+    "Adversarial Attack (poisoning, evasion)",
+    "Concept Drift",
+    "Tool Calibration Error",
+    "Human Error in Analysis",
+    "Chain of Custody Issues",
+    "Multiple Error Sources",
+    "Unknown/Under Investigation"
+]
+def save_to_file(content, filename):
+    """Helper to save content to a file and return the path"""
+    filepath = f"/tmp/{filename}"
+    with open(filepath, 'w') as f:
+        f.write(content)
+    return filepath
+def generate_model_card(*args):
+    """Generate model card outputs from form inputs"""
+    # Unpack all arguments in sequence
+    (mmcid, version, owner, use_context, layer_n,
+     case_statement, hypothesis,
+     classification, classification_other,
+     reasoning_type, reasoning_other,
+     bias, bias_other,
+     cause_of_bias, cause_bias_other,
+     error, cause_of_error, cause_error_other) = args[:18]
+    # Remaining args are MC0 and MC1 elements (checkbox + text pairs)
+    remaining_args = args[18:]
+    # Validate MMCID if provided
+    if mmcid and not validate_mmcid(mmcid):
+        return "❌ Invalid MMCID format. Please use format: DF-MC-YYYY-NNN (e.g., DF-MC-2025-001)", None, None
+    # Build metadata
+    metadata = {
+        "mmcid": mmcid or "Not specified",
+        "version": version or "N/A",
+        "owner": owner or "Not specified",
+        "use_context": use_context or "Not specified",
+        "layer_n": layer_n or "N/A",
+        "case_statement": case_statement,
+        "hypothesis": hypothesis,
+        "classification": list(classification) + ([classification_other] if classification_other else []),
+        "reasoning_type": list(reasoning_type) + ([reasoning_other] if reasoning_other else []),
+        "bias": list(bias) + ([bias_other] if bias_other else []),
+        "cause_of_bias": list(cause_of_bias) + ([cause_bias_other] if cause_bias_other else []),
+        "error": error,
+        "cause_of_error": list(cause_of_error) + ([cause_error_other] if cause_error_other else [])
+    }
+    # MC0 Top Level Elements (9 elements after removing duplicates)
+    mc0_keys = [
+        "algorithm", "inference", "confounder", "evaluation", "tool",
+        "evidence_mc1", "file_type", "data_structure", "degree_of_confidence"
+    ]
+    top_level = {}
+    for i, key in enumerate(mc0_keys):
+        check_val = remaining_args[i*2]
+        desc_val = remaining_args[i*2 + 1]
+        top_level[key] = {
+            "applicable": check_val,
+            "description": desc_val if check_val else ""
+        }
+    # MC1 Data & Processes (19 elements)
+    process_start_idx = len(mc0_keys) * 2
+    process_keys = [
+        "event_data", "parse_raw_data", "validate", "identify_partitions",
+        "process_file_system", "identify_content_carving", "file_type_identification",
+        "file_specific_processing", "file_hashing", "hash_matching",
+        "mismatched_signature_detection", "timeline", "timeline_analysis",
+        "geolocation", "geolocation_analysis", "keyword_indexing",
+        "keyword_searching", "automated_result_interpretation", "ai_based_content_flagging"
+    ]
+    processes = {}
+    for i, key in enumerate(process_keys):
+        idx = process_start_idx + (i * 2)
+        check_val = remaining_args[idx]
+        desc_val = remaining_args[idx + 1]
+        processes[key] = {
+            "applicable": check_val,
+            "description": desc_val if check_val else ""
+        }
+    # Generate outputs
+    json_output = generate_json_output(metadata, top_level, processes, GENERATOR_VERSION)
+    markdown_output = generate_markdown_output(metadata, top_level, processes, GENERATOR_VERSION)
+    # Save to files
+    json_file = save_to_file(json_output, "model_card.json")
+    md_file = save_to_file(markdown_output, "README.md")
+    return markdown_output, json_file, md_file
+# Build Single-Form Gradio Interface
+with gr.Blocks(title="Digital Forensics Model Card Generator", theme=gr.themes.Soft()) as demo:
+    gr.Markdown(f"""
+    # 🔬 Digital Forensics Model Card Generator
+    Create standardized model cards for digital forensics AI/ML systems.
+    **Based on:**
+    - Di Maio, P. (2024). Towards Open Standards for Systemic Complexity in Digital Forensics
+    - Hargreaves, C., Nelson, A., & Casey, E. (2024). An abstract model for digital forensic analysis tools
+    **Version:** {GENERATOR_VERSION}
+    ---
+    """)
+    # SECTION 1: IDENTIFICATION & CONTEXT
+    gr.Markdown("## 📋 Section 1: Identification & Context")
+    with gr.Row():
+        mmcid = gr.Textbox(
+            label="MMCID - Identifier",
+            placeholder="DF-MC-2025-001",
+            info="Format: DF-MC-YYYY-NNN"
+        )
+        version = gr.Textbox(
+            label="MCV - Version",
+            placeholder="1.0 or N/A"
+        )
+    with gr.Row():
+        owner = gr.Textbox(
+            label="DF-MCO - Owner",
+            placeholder="Organization or individual name"
+        )
+        use_context = gr.Dropdown(
+            choices=CV_USE_CONTEXT,
+            label="DF-MCUse - Usage Context"
+        )
+    layer_n = gr.Textbox(
+        label="DF-MC Ln - Layer/Stage",
+        placeholder="Specify layer or stage number if applicable"
+    )
+    # SECTION 2: CASE CONTEXT
+    gr.Markdown("## 📝 Section 2: Case Context")
+    case_statement = gr.TextArea(
+        label="DF-MC CS - Case Statement",
+        placeholder="Describe the case context, investigation scope, and objectives...",
+        lines=3
+    )
+    hypothesis = gr.TextArea(
+        label="DF-MC H - Hypothesis",
+        placeholder="State the hypothesis being tested or investigated...",
+        lines=3
+    )
+    # SECTION 3: CLASSIFICATION & APPROACH
+    gr.Markdown("## 🔍 Section 3: Classification & Approach")
+    gr.Markdown("*Select up to 3 items from each controlled vocabulary*")
+    with gr.Row():
+        with gr.Column():
+            classification = gr.CheckboxGroup(
+                choices=CV_CLASSIFICATION,
+                label="DF-MC C - Classification (max 3)",
+                info="Select forensic domain(s)"
+            )
+        with gr.Column():
+            classification_other = gr.Textbox(
+                label="Other Classification",
+                placeholder="Specify if not listed"
+            )
+    with gr.Row():
+        with gr.Column():
+            reasoning_type = gr.CheckboxGroup(
+                choices=CV_REASONING,
+                label="DF-MC TR - Type of Reasoning (max 3)",
+                info="Select reasoning method(s)"
+            )
+        with gr.Column():
+            reasoning_other = gr.Textbox(
+                label="Other Reasoning",
+                placeholder="Specify if not listed"
+            )
+    # SECTION 4: QUALITY & LIMITATIONS
+    gr.Markdown("## ⚠️ Section 4: Quality & Limitations")
+    with gr.Row():
+        with gr.Column():
+            bias = gr.CheckboxGroup(
+                choices=CV_BIAS,
+                label="DF-MC B - Bias (max 3)",
+                info="Identify bias type(s)"
+            )
+        with gr.Column():
+            bias_other = gr.Textbox(
+                label="Other Bias",
+                placeholder="Specify if not listed"
+            )
+    with gr.Row():
+        with gr.Column():
+            cause_of_bias = gr.CheckboxGroup(
+                choices=CV_CAUSE_OF_BIAS,
+                label="DF-MC CB - Cause of Bias (max 3)",
+                info="Identify root cause(s)"
+            )
+        with gr.Column():
+            cause_bias_other = gr.Textbox(
+                label="Other Cause of Bias",
+                placeholder="Specify if not listed"
+            )
+    error = gr.TextArea(
+        label="DF-MC E - Error Description",
+        placeholder="Describe any errors encountered during analysis...",
+        lines=3
+    )
+    with gr.Row():
+        with gr.Column():
+            cause_of_error = gr.CheckboxGroup(
+                choices=CV_CAUSE_OF_ERROR,
+                label="DF-MC CE - Cause of Error (max 3)",
+                info="Identify error cause(s)"
+            )
+        with gr.Column():
+            cause_error_other = gr.Textbox(
+                label="Other Cause of Error",
+                placeholder="Specify if not listed"
+            )
+    # SECTION 5: TOP LEVEL ELEMENTS (MC0 - Figure 6, deduplicated)
+    gr.Markdown("## 🔝 Section 5: Top Level Elements (DF MC 0 - Figure 6)")
+    gr.Markdown("*Check applicable elements and provide descriptions*")
+    mc0_elements = [
+        ("algorithm", "Algorithm"),
+        ("inference", "Inference"),
+        ("confounder", "Confounder"),
+        ("evaluation", "Evaluation"),
+        ("tool", "Tool"),
+        ("evidence_mc1", "Evidence MC1"),
+        ("file_type", "File Type"),
+        ("data_structure", "Data Structure"),
+        ("degree_confidence", "Degree of Confidence")
+    ]
+    mc0_components = []
+    for elem_id, elem_label in mc0_elements:
+        with gr.Row():
+            check = gr.Checkbox(label=f"✓ {elem_label}", value=False)
+            desc = gr.TextArea(
+                label=f"Description",
+                placeholder=f"Describe {elem_label.lower()} if applicable...",
+                lines=2
+            )
+            mc0_components.extend([check, desc])
+    # SECTION 6: DATA & PROCESSES (MC1 - Figure 7)
+    gr.Markdown("## ⚙️ Section 6: Data Types & Analytical Processes (DF MC 1 - Figure 7)")
+    gr.Markdown("*Check applicable processes and describe how they were performed*")
+    mc1_processes = [
+        ("event_data", "EVENT/DATA"),
+        ("parse_raw", "Parse Raw Data Contained Within the Image"),
+        ("validate", "Validate the Data Compared"),
+        ("identify_partitions", "Identify Partitions"),
+        ("process_filesystem", "Process File System"),
+        ("identify_content", "Identify Content (Carving)"),
+        ("file_type_id", "File Type Identification"),
+        ("file_specific", "File-Specific Processing"),
+        ("file_hashing", "File Hashing"),
+        ("hash_matching", "Hash Matching"),
+        ("mismatched_sig", "Mismatched Signature Detection"),
+        ("timeline", "Timeline"),
+        ("timeline_analysis", "Timeline Analysis"),
+        ("geolocation", "Geolocation"),
+        ("geolocation_analysis", "Geolocation Analysis"),
+        ("keyword_indexing", "Keyword Indexing"),
+        ("keyword_searching", "Keyword Searching"),
+        ("automated_result", "Automated Result Interpretation"),
+        ("ai_content_flag", "AI-Based Content Flagging")
+    ]
+    mc1_components = []
+    for proc_id, proc_label in mc1_processes:
+        with gr.Row():
+            check = gr.Checkbox(label=f"✓ {proc_label}", value=False)
+            desc = gr.TextArea(
+                label=f"Description",
+                placeholder=f"Describe how {proc_label.lower()} was performed...",
+                lines=2
+            )
+            mc1_components.extend([check, desc])
+    # GENERATION & OUTPUT
+    gr.Markdown("---")
+    gr.Markdown("## 🚀 Generate Your Model Card")
+    generate_btn = gr.Button("Generate Model Card", variant="primary", size="lg")
+    gr.Markdown("### Preview & Download")
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("**Markdown Preview:**")
+            preview_output = gr.Markdown()
+        with gr.Column():
+            gr.Markdown("**Download Files:**")
+            json_download = gr.File(label="JSON File", type="filepath")
+            md_download = gr.File(label="README.md", type="filepath")
+    # Wire up generation
+    all_inputs = [
+        mmcid, version, owner, use_context, layer_n,
+        case_statement, hypothesis,
+        classification, classification_other,
+        reasoning_type, reasoning_other,
+        bias, bias_other,
+        cause_of_bias, cause_bias_other,
+        error, cause_of_error, cause_error_other
+    ] + mc0_components + mc1_components
+    generate_btn.click(
+        fn=generate_model_card,
+        inputs=all_inputs,
+        outputs=[preview_output, json_download, md_download]
+    )
+    gr.Markdown(f"""
+    ---
+    ### 📚 References & Information
+    **References:**
+    - Di Maio, P. (2024). Towards Open Standards for Systemic Complexity in Digital Forensics. https://papers.cool.arxiv/2512.12970
+    - Hargreaves, C., Nelson, A., & Casey, E. (2024). An abstract model for digital forensic analysis tools—A foundation for systematic error mitigation analysis. *Forensic Science International: Digital Investigation*, 48.
+    **Generator Version:** {GENERATOR_VERSION} (Beta)
+    **License:** Apache 2.0
+    *This is a beta version. All fields are optional. Feedback welcome!*
+    """)
+if __name__ == "__main__":
+    demo.launch()