Spaces:

STARBORN
/

forensics_mc_generator

Sleeping

App Files Files Community

STARBORN commited on Dec 18, 2025

Commit

2a9f7cb

verified ·

1 Parent(s): 6583075

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -448

app.py DELETED Viewed

@@ -1,448 +0,0 @@
-"""
-Digital Forensics Model Card Generator
-A tool for creating standardized model cards for digital forensics AI/ML models
-"""
-import gradio as gr
-import json
-from datetime import datetime
-from utils.generator import generate_json_output, generate_markdown_output
-from utils.validators import validate_mmcid
-# Version
-GENERATOR_VERSION = "1.0.0"
-# Controlled Vocabularies
-CV_USE_CONTEXT = ["Standalone", "Integrated", "Hybrid (both standalone and integrated)"]
-CV_CLASSIFICATION = [
-    "Computer Forensics",
-    "Network Forensics",
-    "Mobile Device Forensics",
-    "Cloud Forensics",
-    "Database Forensics",
-    "Memory Forensics",
-    "Digital Image Forensics",
-    "Digital Video/Audio Forensics",
-    "IoT Forensics",
-    "Multi-domain (covers multiple types)"
-]
-CV_REASONING = [
-    "Deductive Reasoning (from general to specific)",
-    "Inductive Reasoning (from specific to general)",
-    "Abductive Reasoning (inference to best explanation)",
-    "Retroductive Reasoning (hypothesis refinement)",
-    "Hybrid/Mixed Reasoning"
-]
-CV_BIAS = [
-    "Data Bias (historical, sampling, selection)",
-    "Algorithmic Bias (model architecture, optimization)",
-    "Human Bias (cognitive, confirmation, implicit)",
-    "Deployment Bias (context mismatch)",
-    "Reporting Bias (documentation gaps)",
-    "Measurement Bias (proxy variables)",
-    "Stereotyping Bias (reinforcing stereotypes)",
-    "Automation Bias (over-reliance on automated results)",
-    "No Identified Bias",
-    "Multiple Bias Types"
-]
-CV_CAUSE_OF_BIAS = [
-    "Unrepresentative Training Data",
-    "Historical Inequities in Data",
-    "Feature Selection Issues",
-    "Labeling Inconsistencies",
-    "Optimization Objective Mismatch",
-    "Insufficient Diversity in Development Team",
-    "Lack of Domain Expertise",
-    "Temporal Drift (data age/staleness)",
-    "Geographic/Cultural Limitations",
-    "Tool/Method Limitations",
-    "Multiple Causes",
-    "Unknown/Under Investigation"
-]
-CV_CAUSE_OF_ERROR = [
-    "Training Error (underfitting)",
-    "Validation Error (model selection issues)",
-    "Testing Error (generalization failure)",
-    "Overfitting (high variance)",
-    "Underfitting (high bias)",
-    "Data Quality Issues (noise, outliers, mislabeling)",
-    "Insufficient Training Data",
-    "Class Imbalance",
-    "Feature Engineering Issues",
-    "Hyperparameter Misconfiguration",
-    "Model Complexity Mismatch",
-    "Adversarial Attack (poisoning, evasion)",
-    "Concept Drift",
-    "Tool Calibration Error",
-    "Human Error in Analysis",
-    "Chain of Custody Issues",
-    "Multiple Error Sources",
-    "Unknown/Under Investigation"
-]
-def generate_model_card(
-    # Metadata
-    mmcid, version, owner, use_context, case_statement, hypothesis,
-    classification, reasoning_type, bias, cause_of_bias, error, cause_of_error, layer_n,
-    classification_other, reasoning_other, bias_other, cause_bias_other, cause_error_other,
-    # Top Level (Figure 6)
-    type_reasoning_check, type_reasoning_desc,
-    cause_error_check, cause_error_desc,
-    algorithm_check, algorithm_desc,
-    inference_check, inference_desc,
-    confounder_check, confounder_desc,
-    classification_check, classification_desc,
-    evaluation_check, evaluation_desc,
-    hypothesis_check, hypothesis_desc,
-    tool_check, tool_desc,
-    bias_debiasing_check, bias_debiasing_desc,
-    case_statement_check, case_statement_desc,
-    evidence_mc1_check, evidence_mc1_desc,
-    file_type_check, file_type_desc,
-    data_structure_check, data_structure_desc,
-    degree_confidence_check, degree_confidence_desc,
-    # Data & Processes (Figure 7)
-    event_data_check, event_data_desc,
-    parse_raw_check, parse_raw_desc,
-    validate_check, validate_desc,
-    identify_partitions_check, identify_partitions_desc,
-    process_filesystem_check, process_filesystem_desc,
-    identify_content_check, identify_content_desc,
-    file_type_id_check, file_type_id_desc,
-    file_specific_check, file_specific_desc,
-    file_hashing_check, file_hashing_desc,
-    hash_matching_check, hash_matching_desc,
-    mismatched_sig_check, mismatched_sig_desc,
-    timeline_check, timeline_desc,
-    timeline_analysis_check, timeline_analysis_desc,
-    geolocation_check, geolocation_desc,
-    geolocation_analysis_check, geolocation_analysis_desc,
-    keyword_indexing_check, keyword_indexing_desc,
-    keyword_searching_check, keyword_searching_desc,
-    automated_result_check, automated_result_desc,
-    ai_content_flag_check, ai_content_flag_desc
-):
-    """Generate model card outputs"""
-    # Validate MMCID if provided
-    if mmcid and not validate_mmcid(mmcid):
-        return "❌ Invalid MMCID format. Please use format: DF-MC-YYYY-NNN (e.g., DF-MC-2025-001)", None, None
-    # Collect metadata
-    metadata = {
-        "mmcid": mmcid or "Not specified",
-        "version": version or "N/A",
-        "owner": owner or "Not specified",
-        "use_context": use_context,
-        "case_statement": case_statement,
-        "hypothesis": hypothesis,
-        "classification": classification + ([classification_other] if classification_other else []),
-        "reasoning_type": reasoning_type + ([reasoning_other] if reasoning_other else []),
-        "bias": bias + ([bias_other] if bias_other else []),
-        "cause_of_bias": cause_of_bias + ([cause_bias_other] if cause_bias_other else []),
-        "error": error,
-        "cause_of_error": cause_of_error + ([cause_error_other] if cause_error_other else []),
-        "layer_n": layer_n or "N/A"
-    }
-    # Collect top level elements
-    top_level = {
-        "type_of_reasoning": {"applicable": type_reasoning_check, "description": type_reasoning_desc},
-        "cause_of_error": {"applicable": cause_error_check, "description": cause_error_desc},
-        "algorithm": {"applicable": algorithm_check, "description": algorithm_desc},
-        "inference": {"applicable": inference_check, "description": inference_desc},
-        "confounder": {"applicable": confounder_check, "description": confounder_desc},
-        "classification": {"applicable": classification_check, "description": classification_desc},
-        "evaluation": {"applicable": evaluation_check, "description": evaluation_desc},
-        "hypothesis": {"applicable": hypothesis_check, "description": hypothesis_desc},
-        "tool": {"applicable": tool_check, "description": tool_desc},
-        "bias_debiasing": {"applicable": bias_debiasing_check, "description": bias_debiasing_desc},
-        "case_statement": {"applicable": case_statement_check, "description": case_statement_desc},
-        "evidence_mc1": {"applicable": evidence_mc1_check, "description": evidence_mc1_desc},
-        "file_type": {"applicable": file_type_check, "description": file_type_desc},
-        "data_structure": {"applicable": data_structure_check, "description": data_structure_desc},
-        "degree_of_confidence": {"applicable": degree_confidence_check, "description": degree_confidence_desc}
-    }
-    # Collect data & processes
-    processes = {
-        "event_data": {"applicable": event_data_check, "description": event_data_desc},
-        "parse_raw_data": {"applicable": parse_raw_check, "description": parse_raw_desc},
-        "validate": {"applicable": validate_check, "description": validate_desc},
-        "identify_partitions": {"applicable": identify_partitions_check, "description": identify_partitions_desc},
-        "process_file_system": {"applicable": process_filesystem_check, "description": process_filesystem_desc},
-        "identify_content_carving": {"applicable": identify_content_check, "description": identify_content_desc},
-        "file_type_identification": {"applicable": file_type_id_check, "description": file_type_id_desc},
-        "file_specific_processing": {"applicable": file_specific_check, "description": file_specific_desc},
-        "file_hashing": {"applicable": file_hashing_check, "description": file_hashing_desc},
-        "hash_matching": {"applicable": hash_matching_check, "description": hash_matching_desc},
-        "mismatched_signature_detection": {"applicable": mismatched_sig_check, "description": mismatched_sig_desc},
-        "timeline": {"applicable": timeline_check, "description": timeline_desc},
-        "timeline_analysis": {"applicable": timeline_analysis_check, "description": timeline_analysis_desc},
-        "geolocation": {"applicable": geolocation_check, "description": geolocation_desc},
-        "geolocation_analysis": {"applicable": geolocation_analysis_check, "description": geolocation_analysis_desc},
-        "keyword_indexing": {"applicable": keyword_indexing_check, "description": keyword_indexing_desc},
-        "keyword_searching": {"applicable": keyword_searching_check, "description": keyword_searching_desc},
-        "automated_result_interpretation": {"applicable": automated_result_check, "description": automated_result_desc},
-        "ai_based_content_flagging": {"applicable": ai_content_flag_check, "description": ai_content_flag_desc}
-    }
-    # Generate outputs
-    json_output = generate_json_output(metadata, top_level, processes, GENERATOR_VERSION)
-    markdown_output = generate_markdown_output(metadata, top_level, processes, GENERATOR_VERSION)
-    return markdown_output, json_output, markdown_output
-# Build Gradio Interface
-with gr.Blocks(title="Digital Forensics Model Card Generator", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("""
-    # 🔬 Digital Forensics Model Card Generator
-    Create standardized model cards for digital forensics AI/ML systems. Based on:
-    - Di Maio, P. (2024). Towards Open Standards for Systemic Complexity in Digital Forensics
-    - Hargreaves, C., Nelson, A., & Casey, E. (2024). An abstract model for digital forensic analysis tools
-    **Version {0}**
-    """.format(GENERATOR_VERSION))
-    with gr.Tabs():
-        # ===== SECTION 1: METADATA =====
-        with gr.Tab("📋 Metadata"):
-            gr.Markdown("### Model Card Metadata\nAll fields are optional unless otherwise specified.")
-            with gr.Row():
-                mmcid = gr.Textbox(
-                    label="MMCID - Identifier",
-                    placeholder="DF-MC-2025-001",
-                    info="Format: DF-MC-YYYY-NNN"
-                )
-                version = gr.Textbox(
-                    label="MCV - Version",
-                    placeholder="1.0 or N/A",
-                    info="Version number or N/A"
-                )
-            owner = gr.Textbox(
-                label="DF-MCO - Owner",
-                placeholder="Organization or individual name"
-            )
-            use_context = gr.Dropdown(
-                choices=CV_USE_CONTEXT,
-                label="DF-MCUse - Usage Context",
-                info="How is this model card used?"
-            )
-            case_statement = gr.TextArea(
-                label="DF-MC CS - Case Statement",
-                placeholder="Describe the case context...",
-                lines=3
-            )
-            hypothesis = gr.TextArea(
-                label="DF-MC H - Hypothesis",
-                placeholder="State the hypothesis being tested...",
-                lines=3
-            )
-            gr.Markdown("#### Select up to 3 items for each category:")
-            with gr.Row():
-                classification = gr.CheckboxGroup(
-                    choices=CV_CLASSIFICATION,
-                    label="DF-MC C - Classification (max 3)",
-                    info="Select up to 3 forensic domains"
-                )
-                classification_other = gr.Textbox(
-                    label="Other Classification",
-                    placeholder="Specify if not listed above"
-                )
-            with gr.Row():
-                reasoning_type = gr.CheckboxGroup(
-                    choices=CV_REASONING,
-                    label="DF-MC TR - Type of Reasoning (max 3)",
-                    info="Select up to 3 reasoning types"
-                )
-                reasoning_other = gr.Textbox(
-                    label="Other Reasoning Type",
-                    placeholder="Specify if not listed above"
-                )
-            with gr.Row():
-                bias = gr.CheckboxGroup(
-                    choices=CV_BIAS,
-                    label="DF-MC B - Bias (max 3)",
-                    info="Select up to 3 bias types"
-                )
-                bias_other = gr.Textbox(
-                    label="Other Bias",
-                    placeholder="Specify if not listed above"
-                )
-            with gr.Row():
-                cause_of_bias = gr.CheckboxGroup(
-                    choices=CV_CAUSE_OF_BIAS,
-                    label="DF-MC CB - Cause of Bias (max 3)",
-                    info="Select up to 3 causes"
-                )
-                cause_bias_other = gr.Textbox(
-                    label="Other Cause of Bias",
-                    placeholder="Specify if not listed above"
-                )
-            error = gr.TextArea(
-                label="DF-MC E - Error",
-                placeholder="Describe errors encountered...",
-                lines=3
-            )
-            with gr.Row():
-                cause_of_error = gr.CheckboxGroup(
-                    choices=CV_CAUSE_OF_ERROR,
-                    label="DF-MC CE - Cause of Error (max 3)",
-                    info="Select up to 3 error causes"
-                )
-                cause_error_other = gr.Textbox(
-                    label="Other Cause of Error",
-                    placeholder="Specify if not listed above"
-                )
-            layer_n = gr.Textbox(
-                label="DF-MC Ln - Layer n",
-                placeholder="Specify layer/stage number if applicable"
-            )
-        # ===== SECTION 2: TOP LEVEL (FIGURE 6) =====
-        with gr.Tab("🔝 Top Level Elements (DF MC 0)"):
-            gr.Markdown("### Figure 6 - Top Level Elements\nCheck applicable items and provide descriptions.")
-            # Create checkboxes with text areas for each element
-            elements = [
-                ("type_reasoning", "Type of Reasoning"),
-                ("cause_error", "Cause of Error"),
-                ("algorithm", "Algorithm"),
-                ("inference", "Inference"),
-                ("confounder", "Confounder"),
-                ("classification", "Classification"),
-                ("evaluation", "Evaluation"),
-                ("hypothesis", "Hypothesis"),
-                ("tool", "Tool"),
-                ("bias_debiasing", "Bias/Debiasing"),
-                ("case_statement", "Case Statement"),
-                ("evidence_mc1", "Evidence MC1"),
-                ("file_type", "File Type"),
-                ("data_structure", "Data Structure"),
-                ("degree_confidence", "Degree of Confidence")
-            ]
-            top_level_components = []
-            for elem_id, elem_label in elements:
-                with gr.Row():
-                    check = gr.Checkbox(label=f"✓ {elem_label}", value=False)
-                    desc = gr.TextArea(
-                        label=f"Description",
-                        placeholder=f"Describe {elem_label.lower()} if applicable...",
-                        lines=2,
-                        visible=False
-                    )
-                    # Show/hide description based on checkbox
-                    check.change(
-                        fn=lambda x: gr.update(visible=x),
-                        inputs=[check],
-                        outputs=[desc]
-                    )
-                    top_level_components.extend([check, desc])
-        # ===== SECTION 3: DATA & PROCESSES (FIGURE 7) =====
-        with gr.Tab("⚙️ Data & Processes (DF MC 1)"):
-            gr.Markdown("### Figure 7 - Data Types and Analytical Processes\nCheck applicable items and provide descriptions.")
-            processes_list = [
-                ("event_data", "EVENT/DATA"),
-                ("parse_raw", "Parse Raw Data Contained Within the Image"),
-                ("validate", "Validate the Data Compared"),
-                ("identify_partitions", "Identify Partitions"),
-                ("process_filesystem", "Process File System"),
-                ("identify_content", "Identify Content (Carving)"),
-                ("file_type_id", "File Type Identification"),
-                ("file_specific", "File-Specific Processing"),
-                ("file_hashing", "File Hashing"),
-                ("hash_matching", "Hash Matching"),
-                ("mismatched_sig", "Mismatched Signature Detection"),
-                ("timeline", "Timeline"),
-                ("timeline_analysis", "Timeline Analysis"),
-                ("geolocation", "Geolocation"),
-                ("geolocation_analysis", "Geolocation Analysis"),
-                ("keyword_indexing", "Keyword Indexing"),
-                ("keyword_searching", "Keyword Searching"),
-                ("automated_result", "Automated Result Interpretation"),
-                ("ai_content_flag", "AI-Based Content Flagging")
-            ]
-            process_components = []
-            for proc_id, proc_label in processes_list:
-                with gr.Row():
-                    check = gr.Checkbox(label=f"✓ {proc_label}", value=False)
-                    desc = gr.TextArea(
-                        label=f"Description",
-                        placeholder=f"Describe {proc_label.lower()} if applicable...",
-                        lines=2,
-                        visible=False
-                    )
-                    check.change(
-                        fn=lambda x: gr.update(visible=x),
-                        inputs=[check],
-                        outputs=[desc]
-                    )
-                    process_components.extend([check, desc])
-    # ===== GENERATION & OUTPUT =====
-    gr.Markdown("---")
-    gr.Markdown("### Generate Your Model Card")
-    generate_btn = gr.Button("🚀 Generate Model Card", variant="primary", size="lg")
-    with gr.Tabs():
-        with gr.Tab("📄 Preview (Markdown)"):
-            preview_output = gr.Markdown(label="Markdown Preview")
-        with gr.Tab("💾 Download Files"):
-            gr.Markdown("Click the buttons below to download your generated model card files:")
-            json_download = gr.File(label="Download JSON")
-            md_download = gr.File(label="Download README.md")
-    # Wire up the generation
-    all_inputs = [
-        mmcid, version, owner, use_context, case_statement, hypothesis,
-        classification, reasoning_type, bias, cause_of_bias, error, cause_of_error, layer_n,
-        classification_other, reasoning_other, bias_other, cause_bias_other, cause_error_other
-    ] + top_level_components + process_components
-    generate_btn.click(
-        fn=generate_model_card,
-        inputs=all_inputs,
-        outputs=[preview_output, json_download, md_download]
-    )
-    gr.Markdown("""
-    ---
-    ### About This Generator
-    **References:**
-    - Di Maio, P. (2024). Towards Open Standards for Systemic Complexity in Digital Forensics. https://papers.cool/arxiv/2512.12970
-    - Hargreaves, C., Nelson, A., & Casey, E. (2024). An abstract model for digital forensic analysis tools—A foundation for systematic error mitigation analysis. *Forensic Science International: Digital Investigation*, 48.
-    **Generator Version:** {0}
-    **License:** Apache 2.0
-    **Contact:** For questions or feedback, please open an issue on the project repository.
-    """.format(GENERATOR_VERSION))
-if __name__ == "__main__":
-    demo.launch()