STARBORN commited on
Commit
2a9f7cb
·
verified ·
1 Parent(s): 6583075

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -448
app.py DELETED
@@ -1,448 +0,0 @@
1
- """
2
- Digital Forensics Model Card Generator
3
- A tool for creating standardized model cards for digital forensics AI/ML models
4
- """
5
-
6
- import gradio as gr
7
- import json
8
- from datetime import datetime
9
- from utils.generator import generate_json_output, generate_markdown_output
10
- from utils.validators import validate_mmcid
11
-
12
- # Version
13
- GENERATOR_VERSION = "1.0.0"
14
-
15
- # Controlled Vocabularies
16
- CV_USE_CONTEXT = ["Standalone", "Integrated", "Hybrid (both standalone and integrated)"]
17
-
18
- CV_CLASSIFICATION = [
19
- "Computer Forensics",
20
- "Network Forensics",
21
- "Mobile Device Forensics",
22
- "Cloud Forensics",
23
- "Database Forensics",
24
- "Memory Forensics",
25
- "Digital Image Forensics",
26
- "Digital Video/Audio Forensics",
27
- "IoT Forensics",
28
- "Multi-domain (covers multiple types)"
29
- ]
30
-
31
- CV_REASONING = [
32
- "Deductive Reasoning (from general to specific)",
33
- "Inductive Reasoning (from specific to general)",
34
- "Abductive Reasoning (inference to best explanation)",
35
- "Retroductive Reasoning (hypothesis refinement)",
36
- "Hybrid/Mixed Reasoning"
37
- ]
38
-
39
- CV_BIAS = [
40
- "Data Bias (historical, sampling, selection)",
41
- "Algorithmic Bias (model architecture, optimization)",
42
- "Human Bias (cognitive, confirmation, implicit)",
43
- "Deployment Bias (context mismatch)",
44
- "Reporting Bias (documentation gaps)",
45
- "Measurement Bias (proxy variables)",
46
- "Stereotyping Bias (reinforcing stereotypes)",
47
- "Automation Bias (over-reliance on automated results)",
48
- "No Identified Bias",
49
- "Multiple Bias Types"
50
- ]
51
-
52
- CV_CAUSE_OF_BIAS = [
53
- "Unrepresentative Training Data",
54
- "Historical Inequities in Data",
55
- "Feature Selection Issues",
56
- "Labeling Inconsistencies",
57
- "Optimization Objective Mismatch",
58
- "Insufficient Diversity in Development Team",
59
- "Lack of Domain Expertise",
60
- "Temporal Drift (data age/staleness)",
61
- "Geographic/Cultural Limitations",
62
- "Tool/Method Limitations",
63
- "Multiple Causes",
64
- "Unknown/Under Investigation"
65
- ]
66
-
67
- CV_CAUSE_OF_ERROR = [
68
- "Training Error (underfitting)",
69
- "Validation Error (model selection issues)",
70
- "Testing Error (generalization failure)",
71
- "Overfitting (high variance)",
72
- "Underfitting (high bias)",
73
- "Data Quality Issues (noise, outliers, mislabeling)",
74
- "Insufficient Training Data",
75
- "Class Imbalance",
76
- "Feature Engineering Issues",
77
- "Hyperparameter Misconfiguration",
78
- "Model Complexity Mismatch",
79
- "Adversarial Attack (poisoning, evasion)",
80
- "Concept Drift",
81
- "Tool Calibration Error",
82
- "Human Error in Analysis",
83
- "Chain of Custody Issues",
84
- "Multiple Error Sources",
85
- "Unknown/Under Investigation"
86
- ]
87
-
88
- def generate_model_card(
89
- # Metadata
90
- mmcid, version, owner, use_context, case_statement, hypothesis,
91
- classification, reasoning_type, bias, cause_of_bias, error, cause_of_error, layer_n,
92
- classification_other, reasoning_other, bias_other, cause_bias_other, cause_error_other,
93
- # Top Level (Figure 6)
94
- type_reasoning_check, type_reasoning_desc,
95
- cause_error_check, cause_error_desc,
96
- algorithm_check, algorithm_desc,
97
- inference_check, inference_desc,
98
- confounder_check, confounder_desc,
99
- classification_check, classification_desc,
100
- evaluation_check, evaluation_desc,
101
- hypothesis_check, hypothesis_desc,
102
- tool_check, tool_desc,
103
- bias_debiasing_check, bias_debiasing_desc,
104
- case_statement_check, case_statement_desc,
105
- evidence_mc1_check, evidence_mc1_desc,
106
- file_type_check, file_type_desc,
107
- data_structure_check, data_structure_desc,
108
- degree_confidence_check, degree_confidence_desc,
109
- # Data & Processes (Figure 7)
110
- event_data_check, event_data_desc,
111
- parse_raw_check, parse_raw_desc,
112
- validate_check, validate_desc,
113
- identify_partitions_check, identify_partitions_desc,
114
- process_filesystem_check, process_filesystem_desc,
115
- identify_content_check, identify_content_desc,
116
- file_type_id_check, file_type_id_desc,
117
- file_specific_check, file_specific_desc,
118
- file_hashing_check, file_hashing_desc,
119
- hash_matching_check, hash_matching_desc,
120
- mismatched_sig_check, mismatched_sig_desc,
121
- timeline_check, timeline_desc,
122
- timeline_analysis_check, timeline_analysis_desc,
123
- geolocation_check, geolocation_desc,
124
- geolocation_analysis_check, geolocation_analysis_desc,
125
- keyword_indexing_check, keyword_indexing_desc,
126
- keyword_searching_check, keyword_searching_desc,
127
- automated_result_check, automated_result_desc,
128
- ai_content_flag_check, ai_content_flag_desc
129
- ):
130
- """Generate model card outputs"""
131
-
132
- # Validate MMCID if provided
133
- if mmcid and not validate_mmcid(mmcid):
134
- return "❌ Invalid MMCID format. Please use format: DF-MC-YYYY-NNN (e.g., DF-MC-2025-001)", None, None
135
-
136
- # Collect metadata
137
- metadata = {
138
- "mmcid": mmcid or "Not specified",
139
- "version": version or "N/A",
140
- "owner": owner or "Not specified",
141
- "use_context": use_context,
142
- "case_statement": case_statement,
143
- "hypothesis": hypothesis,
144
- "classification": classification + ([classification_other] if classification_other else []),
145
- "reasoning_type": reasoning_type + ([reasoning_other] if reasoning_other else []),
146
- "bias": bias + ([bias_other] if bias_other else []),
147
- "cause_of_bias": cause_of_bias + ([cause_bias_other] if cause_bias_other else []),
148
- "error": error,
149
- "cause_of_error": cause_of_error + ([cause_error_other] if cause_error_other else []),
150
- "layer_n": layer_n or "N/A"
151
- }
152
-
153
- # Collect top level elements
154
- top_level = {
155
- "type_of_reasoning": {"applicable": type_reasoning_check, "description": type_reasoning_desc},
156
- "cause_of_error": {"applicable": cause_error_check, "description": cause_error_desc},
157
- "algorithm": {"applicable": algorithm_check, "description": algorithm_desc},
158
- "inference": {"applicable": inference_check, "description": inference_desc},
159
- "confounder": {"applicable": confounder_check, "description": confounder_desc},
160
- "classification": {"applicable": classification_check, "description": classification_desc},
161
- "evaluation": {"applicable": evaluation_check, "description": evaluation_desc},
162
- "hypothesis": {"applicable": hypothesis_check, "description": hypothesis_desc},
163
- "tool": {"applicable": tool_check, "description": tool_desc},
164
- "bias_debiasing": {"applicable": bias_debiasing_check, "description": bias_debiasing_desc},
165
- "case_statement": {"applicable": case_statement_check, "description": case_statement_desc},
166
- "evidence_mc1": {"applicable": evidence_mc1_check, "description": evidence_mc1_desc},
167
- "file_type": {"applicable": file_type_check, "description": file_type_desc},
168
- "data_structure": {"applicable": data_structure_check, "description": data_structure_desc},
169
- "degree_of_confidence": {"applicable": degree_confidence_check, "description": degree_confidence_desc}
170
- }
171
-
172
- # Collect data & processes
173
- processes = {
174
- "event_data": {"applicable": event_data_check, "description": event_data_desc},
175
- "parse_raw_data": {"applicable": parse_raw_check, "description": parse_raw_desc},
176
- "validate": {"applicable": validate_check, "description": validate_desc},
177
- "identify_partitions": {"applicable": identify_partitions_check, "description": identify_partitions_desc},
178
- "process_file_system": {"applicable": process_filesystem_check, "description": process_filesystem_desc},
179
- "identify_content_carving": {"applicable": identify_content_check, "description": identify_content_desc},
180
- "file_type_identification": {"applicable": file_type_id_check, "description": file_type_id_desc},
181
- "file_specific_processing": {"applicable": file_specific_check, "description": file_specific_desc},
182
- "file_hashing": {"applicable": file_hashing_check, "description": file_hashing_desc},
183
- "hash_matching": {"applicable": hash_matching_check, "description": hash_matching_desc},
184
- "mismatched_signature_detection": {"applicable": mismatched_sig_check, "description": mismatched_sig_desc},
185
- "timeline": {"applicable": timeline_check, "description": timeline_desc},
186
- "timeline_analysis": {"applicable": timeline_analysis_check, "description": timeline_analysis_desc},
187
- "geolocation": {"applicable": geolocation_check, "description": geolocation_desc},
188
- "geolocation_analysis": {"applicable": geolocation_analysis_check, "description": geolocation_analysis_desc},
189
- "keyword_indexing": {"applicable": keyword_indexing_check, "description": keyword_indexing_desc},
190
- "keyword_searching": {"applicable": keyword_searching_check, "description": keyword_searching_desc},
191
- "automated_result_interpretation": {"applicable": automated_result_check, "description": automated_result_desc},
192
- "ai_based_content_flagging": {"applicable": ai_content_flag_check, "description": ai_content_flag_desc}
193
- }
194
-
195
- # Generate outputs
196
- json_output = generate_json_output(metadata, top_level, processes, GENERATOR_VERSION)
197
- markdown_output = generate_markdown_output(metadata, top_level, processes, GENERATOR_VERSION)
198
-
199
- return markdown_output, json_output, markdown_output
200
-
201
-
202
- # Build Gradio Interface
203
- with gr.Blocks(title="Digital Forensics Model Card Generator", theme=gr.themes.Soft()) as demo:
204
- gr.Markdown("""
205
- # 🔬 Digital Forensics Model Card Generator
206
-
207
- Create standardized model cards for digital forensics AI/ML systems. Based on:
208
- - Di Maio, P. (2024). Towards Open Standards for Systemic Complexity in Digital Forensics
209
- - Hargreaves, C., Nelson, A., & Casey, E. (2024). An abstract model for digital forensic analysis tools
210
-
211
- **Version {0}**
212
- """.format(GENERATOR_VERSION))
213
-
214
- with gr.Tabs():
215
- # ===== SECTION 1: METADATA =====
216
- with gr.Tab("📋 Metadata"):
217
- gr.Markdown("### Model Card Metadata\nAll fields are optional unless otherwise specified.")
218
-
219
- with gr.Row():
220
- mmcid = gr.Textbox(
221
- label="MMCID - Identifier",
222
- placeholder="DF-MC-2025-001",
223
- info="Format: DF-MC-YYYY-NNN"
224
- )
225
- version = gr.Textbox(
226
- label="MCV - Version",
227
- placeholder="1.0 or N/A",
228
- info="Version number or N/A"
229
- )
230
-
231
- owner = gr.Textbox(
232
- label="DF-MCO - Owner",
233
- placeholder="Organization or individual name"
234
- )
235
-
236
- use_context = gr.Dropdown(
237
- choices=CV_USE_CONTEXT,
238
- label="DF-MCUse - Usage Context",
239
- info="How is this model card used?"
240
- )
241
-
242
- case_statement = gr.TextArea(
243
- label="DF-MC CS - Case Statement",
244
- placeholder="Describe the case context...",
245
- lines=3
246
- )
247
-
248
- hypothesis = gr.TextArea(
249
- label="DF-MC H - Hypothesis",
250
- placeholder="State the hypothesis being tested...",
251
- lines=3
252
- )
253
-
254
- gr.Markdown("#### Select up to 3 items for each category:")
255
-
256
- with gr.Row():
257
- classification = gr.CheckboxGroup(
258
- choices=CV_CLASSIFICATION,
259
- label="DF-MC C - Classification (max 3)",
260
- info="Select up to 3 forensic domains"
261
- )
262
- classification_other = gr.Textbox(
263
- label="Other Classification",
264
- placeholder="Specify if not listed above"
265
- )
266
-
267
- with gr.Row():
268
- reasoning_type = gr.CheckboxGroup(
269
- choices=CV_REASONING,
270
- label="DF-MC TR - Type of Reasoning (max 3)",
271
- info="Select up to 3 reasoning types"
272
- )
273
- reasoning_other = gr.Textbox(
274
- label="Other Reasoning Type",
275
- placeholder="Specify if not listed above"
276
- )
277
-
278
- with gr.Row():
279
- bias = gr.CheckboxGroup(
280
- choices=CV_BIAS,
281
- label="DF-MC B - Bias (max 3)",
282
- info="Select up to 3 bias types"
283
- )
284
- bias_other = gr.Textbox(
285
- label="Other Bias",
286
- placeholder="Specify if not listed above"
287
- )
288
-
289
- with gr.Row():
290
- cause_of_bias = gr.CheckboxGroup(
291
- choices=CV_CAUSE_OF_BIAS,
292
- label="DF-MC CB - Cause of Bias (max 3)",
293
- info="Select up to 3 causes"
294
- )
295
- cause_bias_other = gr.Textbox(
296
- label="Other Cause of Bias",
297
- placeholder="Specify if not listed above"
298
- )
299
-
300
- error = gr.TextArea(
301
- label="DF-MC E - Error",
302
- placeholder="Describe errors encountered...",
303
- lines=3
304
- )
305
-
306
- with gr.Row():
307
- cause_of_error = gr.CheckboxGroup(
308
- choices=CV_CAUSE_OF_ERROR,
309
- label="DF-MC CE - Cause of Error (max 3)",
310
- info="Select up to 3 error causes"
311
- )
312
- cause_error_other = gr.Textbox(
313
- label="Other Cause of Error",
314
- placeholder="Specify if not listed above"
315
- )
316
-
317
- layer_n = gr.Textbox(
318
- label="DF-MC Ln - Layer n",
319
- placeholder="Specify layer/stage number if applicable"
320
- )
321
-
322
- # ===== SECTION 2: TOP LEVEL (FIGURE 6) =====
323
- with gr.Tab("🔝 Top Level Elements (DF MC 0)"):
324
- gr.Markdown("### Figure 6 - Top Level Elements\nCheck applicable items and provide descriptions.")
325
-
326
- # Create checkboxes with text areas for each element
327
- elements = [
328
- ("type_reasoning", "Type of Reasoning"),
329
- ("cause_error", "Cause of Error"),
330
- ("algorithm", "Algorithm"),
331
- ("inference", "Inference"),
332
- ("confounder", "Confounder"),
333
- ("classification", "Classification"),
334
- ("evaluation", "Evaluation"),
335
- ("hypothesis", "Hypothesis"),
336
- ("tool", "Tool"),
337
- ("bias_debiasing", "Bias/Debiasing"),
338
- ("case_statement", "Case Statement"),
339
- ("evidence_mc1", "Evidence MC1"),
340
- ("file_type", "File Type"),
341
- ("data_structure", "Data Structure"),
342
- ("degree_confidence", "Degree of Confidence")
343
- ]
344
-
345
- top_level_components = []
346
- for elem_id, elem_label in elements:
347
- with gr.Row():
348
- check = gr.Checkbox(label=f"✓ {elem_label}", value=False)
349
- desc = gr.TextArea(
350
- label=f"Description",
351
- placeholder=f"Describe {elem_label.lower()} if applicable...",
352
- lines=2,
353
- visible=False
354
- )
355
- # Show/hide description based on checkbox
356
- check.change(
357
- fn=lambda x: gr.update(visible=x),
358
- inputs=[check],
359
- outputs=[desc]
360
- )
361
- top_level_components.extend([check, desc])
362
-
363
- # ===== SECTION 3: DATA & PROCESSES (FIGURE 7) =====
364
- with gr.Tab("⚙️ Data & Processes (DF MC 1)"):
365
- gr.Markdown("### Figure 7 - Data Types and Analytical Processes\nCheck applicable items and provide descriptions.")
366
-
367
- processes_list = [
368
- ("event_data", "EVENT/DATA"),
369
- ("parse_raw", "Parse Raw Data Contained Within the Image"),
370
- ("validate", "Validate the Data Compared"),
371
- ("identify_partitions", "Identify Partitions"),
372
- ("process_filesystem", "Process File System"),
373
- ("identify_content", "Identify Content (Carving)"),
374
- ("file_type_id", "File Type Identification"),
375
- ("file_specific", "File-Specific Processing"),
376
- ("file_hashing", "File Hashing"),
377
- ("hash_matching", "Hash Matching"),
378
- ("mismatched_sig", "Mismatched Signature Detection"),
379
- ("timeline", "Timeline"),
380
- ("timeline_analysis", "Timeline Analysis"),
381
- ("geolocation", "Geolocation"),
382
- ("geolocation_analysis", "Geolocation Analysis"),
383
- ("keyword_indexing", "Keyword Indexing"),
384
- ("keyword_searching", "Keyword Searching"),
385
- ("automated_result", "Automated Result Interpretation"),
386
- ("ai_content_flag", "AI-Based Content Flagging")
387
- ]
388
-
389
- process_components = []
390
- for proc_id, proc_label in processes_list:
391
- with gr.Row():
392
- check = gr.Checkbox(label=f"✓ {proc_label}", value=False)
393
- desc = gr.TextArea(
394
- label=f"Description",
395
- placeholder=f"Describe {proc_label.lower()} if applicable...",
396
- lines=2,
397
- visible=False
398
- )
399
- check.change(
400
- fn=lambda x: gr.update(visible=x),
401
- inputs=[check],
402
- outputs=[desc]
403
- )
404
- process_components.extend([check, desc])
405
-
406
- # ===== GENERATION & OUTPUT =====
407
- gr.Markdown("---")
408
- gr.Markdown("### Generate Your Model Card")
409
-
410
- generate_btn = gr.Button("🚀 Generate Model Card", variant="primary", size="lg")
411
-
412
- with gr.Tabs():
413
- with gr.Tab("📄 Preview (Markdown)"):
414
- preview_output = gr.Markdown(label="Markdown Preview")
415
-
416
- with gr.Tab("💾 Download Files"):
417
- gr.Markdown("Click the buttons below to download your generated model card files:")
418
- json_download = gr.File(label="Download JSON")
419
- md_download = gr.File(label="Download README.md")
420
-
421
- # Wire up the generation
422
- all_inputs = [
423
- mmcid, version, owner, use_context, case_statement, hypothesis,
424
- classification, reasoning_type, bias, cause_of_bias, error, cause_of_error, layer_n,
425
- classification_other, reasoning_other, bias_other, cause_bias_other, cause_error_other
426
- ] + top_level_components + process_components
427
-
428
- generate_btn.click(
429
- fn=generate_model_card,
430
- inputs=all_inputs,
431
- outputs=[preview_output, json_download, md_download]
432
- )
433
-
434
- gr.Markdown("""
435
- ---
436
- ### About This Generator
437
-
438
- **References:**
439
- - Di Maio, P. (2024). Towards Open Standards for Systemic Complexity in Digital Forensics. https://papers.cool/arxiv/2512.12970
440
- - Hargreaves, C., Nelson, A., & Casey, E. (2024). An abstract model for digital forensic analysis tools—A foundation for systematic error mitigation analysis. *Forensic Science International: Digital Investigation*, 48.
441
-
442
- **Generator Version:** {0}
443
- **License:** Apache 2.0
444
- **Contact:** For questions or feedback, please open an issue on the project repository.
445
- """.format(GENERATOR_VERSION))
446
-
447
- if __name__ == "__main__":
448
- demo.launch()