STARBORN commited on
Commit
4997f9e
·
verified ·
1 Parent(s): c2a3d02

Upload 2 files

Browse files
Files changed (2) hide show
  1. app(4).py +421 -0
  2. requirements(2).txt +2 -0
app(4).py ADDED
@@ -0,0 +1,421 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Digital Forensics Model Card Generator
3
+ A tool for creating standardized model cards for digital forensics AI/ML models
4
+ Compatible with Gradio 5.x
5
+ """
6
+
7
+ import gradio as gr
8
+ import json
9
+ from datetime import datetime
10
+ from utils.generator import generate_json_output, generate_markdown_output
11
+ from utils.validators import validate_mmcid
12
+
13
+ # Version
14
+ GENERATOR_VERSION = "1.0.0"
15
+
16
+ # Controlled Vocabularies
17
+ CV_USE_CONTEXT = ["Standalone", "Integrated", "Hybrid (both standalone and integrated)"]
18
+
19
+ CV_CLASSIFICATION = [
20
+ "Computer Forensics",
21
+ "Network Forensics",
22
+ "Mobile Device Forensics",
23
+ "Cloud Forensics",
24
+ "Database Forensics",
25
+ "Memory Forensics",
26
+ "Digital Image Forensics",
27
+ "Digital Video/Audio Forensics",
28
+ "IoT Forensics",
29
+ "Multi-domain (covers multiple types)"
30
+ ]
31
+
32
+ CV_REASONING = [
33
+ "Deductive Reasoning (from general to specific)",
34
+ "Inductive Reasoning (from specific to general)",
35
+ "Abductive Reasoning (inference to best explanation)",
36
+ "Retroductive Reasoning (hypothesis refinement)",
37
+ "Hybrid/Mixed Reasoning"
38
+ ]
39
+
40
+ CV_BIAS = [
41
+ "Data Bias (historical, sampling, selection)",
42
+ "Algorithmic Bias (model architecture, optimization)",
43
+ "Human Bias (cognitive, confirmation, implicit)",
44
+ "Deployment Bias (context mismatch)",
45
+ "Reporting Bias (documentation gaps)",
46
+ "Measurement Bias (proxy variables)",
47
+ "Stereotyping Bias (reinforcing stereotypes)",
48
+ "Automation Bias (over-reliance on automated results)",
49
+ "No Identified Bias",
50
+ "Multiple Bias Types"
51
+ ]
52
+
53
+ CV_CAUSE_OF_BIAS = [
54
+ "Unrepresentative Training Data",
55
+ "Historical Inequities in Data",
56
+ "Feature Selection Issues",
57
+ "Labeling Inconsistencies",
58
+ "Optimization Objective Mismatch",
59
+ "Insufficient Diversity in Development Team",
60
+ "Lack of Domain Expertise",
61
+ "Temporal Drift (data age/staleness)",
62
+ "Geographic/Cultural Limitations",
63
+ "Tool/Method Limitations",
64
+ "Multiple Causes",
65
+ "Unknown/Under Investigation"
66
+ ]
67
+
68
+ CV_CAUSE_OF_ERROR = [
69
+ "Training Error (underfitting)",
70
+ "Validation Error (model selection issues)",
71
+ "Testing Error (generalization failure)",
72
+ "Overfitting (high variance)",
73
+ "Underfitting (high bias)",
74
+ "Data Quality Issues (noise, outliers, mislabeling)",
75
+ "Insufficient Training Data",
76
+ "Class Imbalance",
77
+ "Feature Engineering Issues",
78
+ "Hyperparameter Misconfiguration",
79
+ "Model Complexity Mismatch",
80
+ "Adversarial Attack (poisoning, evasion)",
81
+ "Concept Drift",
82
+ "Tool Calibration Error",
83
+ "Human Error in Analysis",
84
+ "Chain of Custody Issues",
85
+ "Multiple Error Sources",
86
+ "Unknown/Under Investigation"
87
+ ]
88
+
89
+ def save_to_file(content, filename):
90
+ """Helper to save content to a file and return the path"""
91
+ filepath = f"/tmp/{filename}"
92
+ with open(filepath, 'w') as f:
93
+ f.write(content)
94
+ return filepath
95
+
96
+ def generate_model_card(*args):
97
+ """Generate model card outputs from form inputs"""
98
+
99
+ # Parse arguments (metadata section)
100
+ (mmcid, version, owner, use_context, case_statement, hypothesis,
101
+ classification, reasoning_type, bias, cause_of_bias, error, cause_of_error, layer_n,
102
+ classification_other, reasoning_other, bias_other, cause_bias_other, cause_error_other) = args[:18]
103
+
104
+ # Validate MMCID if provided
105
+ if mmcid and not validate_mmcid(mmcid):
106
+ return "❌ Invalid MMCID format. Please use format: DF-MC-YYYY-NNN (e.g., DF-MC-2025-001)", None, None
107
+
108
+ # Top level elements (15 pairs of checkbox + text)
109
+ top_level_args = args[18:48] # 15 * 2 = 30 args
110
+
111
+ # Process elements (19 pairs of checkbox + text)
112
+ process_args = args[48:86] # 19 * 2 = 38 args
113
+
114
+ # Build metadata
115
+ metadata = {
116
+ "mmcid": mmcid or "Not specified",
117
+ "version": version or "N/A",
118
+ "owner": owner or "Not specified",
119
+ "use_context": use_context or "Not specified",
120
+ "case_statement": case_statement,
121
+ "hypothesis": hypothesis,
122
+ "classification": list(classification) + ([classification_other] if classification_other else []),
123
+ "reasoning_type": list(reasoning_type) + ([reasoning_other] if reasoning_other else []),
124
+ "bias": list(bias) + ([bias_other] if bias_other else []),
125
+ "cause_of_bias": list(cause_of_bias) + ([cause_bias_other] if cause_bias_other else []),
126
+ "error": error,
127
+ "cause_of_error": list(cause_of_error) + ([cause_error_other] if cause_error_other else []),
128
+ "layer_n": layer_n or "N/A"
129
+ }
130
+
131
+ # Build top level elements
132
+ top_level_keys = [
133
+ "type_of_reasoning", "cause_of_error", "algorithm", "inference", "confounder",
134
+ "classification", "evaluation", "hypothesis", "tool", "bias_debiasing",
135
+ "case_statement", "evidence_mc1", "file_type", "data_structure", "degree_of_confidence"
136
+ ]
137
+
138
+ top_level = {}
139
+ for i, key in enumerate(top_level_keys):
140
+ check_val = top_level_args[i*2]
141
+ desc_val = top_level_args[i*2 + 1]
142
+ top_level[key] = {
143
+ "applicable": check_val,
144
+ "description": desc_val if check_val else ""
145
+ }
146
+
147
+ # Build process elements
148
+ process_keys = [
149
+ "event_data", "parse_raw_data", "validate", "identify_partitions",
150
+ "process_file_system", "identify_content_carving", "file_type_identification",
151
+ "file_specific_processing", "file_hashing", "hash_matching",
152
+ "mismatched_signature_detection", "timeline", "timeline_analysis",
153
+ "geolocation", "geolocation_analysis", "keyword_indexing",
154
+ "keyword_searching", "automated_result_interpretation", "ai_based_content_flagging"
155
+ ]
156
+
157
+ processes = {}
158
+ for i, key in enumerate(process_keys):
159
+ check_val = process_args[i*2]
160
+ desc_val = process_args[i*2 + 1]
161
+ processes[key] = {
162
+ "applicable": check_val,
163
+ "description": desc_val if check_val else ""
164
+ }
165
+
166
+ # Generate outputs
167
+ json_output = generate_json_output(metadata, top_level, processes, GENERATOR_VERSION)
168
+ markdown_output = generate_markdown_output(metadata, top_level, processes, GENERATOR_VERSION)
169
+
170
+ # Save to files
171
+ json_file = save_to_file(json_output, "model_card.json")
172
+ md_file = save_to_file(markdown_output, "README.md")
173
+
174
+ return markdown_output, json_file, md_file
175
+
176
+
177
+ # Build Gradio Interface
178
+ with gr.Blocks(title="Digital Forensics Model Card Generator", theme=gr.themes.Soft()) as demo:
179
+ gr.Markdown(f"""
180
+ # 🔬 Digital Forensics Model Card Generator
181
+
182
+ Create standardized model cards for digital forensics AI/ML systems. Based on:
183
+ - Di Maio, P. (2024). Towards Open Standards for Systemic Complexity in Digital Forensics
184
+ - Hargreaves, C., Nelson, A., & Casey, E. (2024). An abstract model for digital forensic analysis tools
185
+
186
+ **Version {GENERATOR_VERSION}**
187
+ """)
188
+
189
+ with gr.Tabs():
190
+ # ===== SECTION 1: METADATA =====
191
+ with gr.Tab("📋 Metadata"):
192
+ gr.Markdown("### Model Card Metadata\nAll fields are optional unless otherwise specified.")
193
+
194
+ with gr.Row():
195
+ mmcid = gr.Textbox(
196
+ label="MMCID - Identifier",
197
+ placeholder="DF-MC-2025-001",
198
+ info="Format: DF-MC-YYYY-NNN"
199
+ )
200
+ version = gr.Textbox(
201
+ label="MCV - Version",
202
+ placeholder="1.0 or N/A",
203
+ info="Version number or N/A"
204
+ )
205
+
206
+ owner = gr.Textbox(
207
+ label="DF-MCO - Owner",
208
+ placeholder="Organization or individual name"
209
+ )
210
+
211
+ use_context = gr.Dropdown(
212
+ choices=CV_USE_CONTEXT,
213
+ label="DF-MCUse - Usage Context",
214
+ info="How is this model card used?"
215
+ )
216
+
217
+ case_statement = gr.TextArea(
218
+ label="DF-MC CS - Case Statement",
219
+ placeholder="Describe the case context...",
220
+ lines=3
221
+ )
222
+
223
+ hypothesis = gr.TextArea(
224
+ label="DF-MC H - Hypothesis",
225
+ placeholder="State the hypothesis being tested...",
226
+ lines=3
227
+ )
228
+
229
+ gr.Markdown("#### Select up to 3 items for each category:")
230
+
231
+ with gr.Row():
232
+ with gr.Column():
233
+ classification = gr.CheckboxGroup(
234
+ choices=CV_CLASSIFICATION,
235
+ label="DF-MC C - Classification (max 3)",
236
+ info="Select up to 3 forensic domains"
237
+ )
238
+ with gr.Column():
239
+ classification_other = gr.Textbox(
240
+ label="Other Classification",
241
+ placeholder="Specify if not listed above"
242
+ )
243
+
244
+ with gr.Row():
245
+ with gr.Column():
246
+ reasoning_type = gr.CheckboxGroup(
247
+ choices=CV_REASONING,
248
+ label="DF-MC TR - Type of Reasoning (max 3)",
249
+ info="Select up to 3 reasoning types"
250
+ )
251
+ with gr.Column():
252
+ reasoning_other = gr.Textbox(
253
+ label="Other Reasoning Type",
254
+ placeholder="Specify if not listed above"
255
+ )
256
+
257
+ with gr.Row():
258
+ with gr.Column():
259
+ bias = gr.CheckboxGroup(
260
+ choices=CV_BIAS,
261
+ label="DF-MC B - Bias (max 3)",
262
+ info="Select up to 3 bias types"
263
+ )
264
+ with gr.Column():
265
+ bias_other = gr.Textbox(
266
+ label="Other Bias",
267
+ placeholder="Specify if not listed above"
268
+ )
269
+
270
+ with gr.Row():
271
+ with gr.Column():
272
+ cause_of_bias = gr.CheckboxGroup(
273
+ choices=CV_CAUSE_OF_BIAS,
274
+ label="DF-MC CB - Cause of Bias (max 3)",
275
+ info="Select up to 3 causes"
276
+ )
277
+ with gr.Column():
278
+ cause_bias_other = gr.Textbox(
279
+ label="Other Cause of Bias",
280
+ placeholder="Specify if not listed above"
281
+ )
282
+
283
+ error = gr.TextArea(
284
+ label="DF-MC E - Error",
285
+ placeholder="Describe errors encountered...",
286
+ lines=3
287
+ )
288
+
289
+ with gr.Row():
290
+ with gr.Column():
291
+ cause_of_error = gr.CheckboxGroup(
292
+ choices=CV_CAUSE_OF_ERROR,
293
+ label="DF-MC CE - Cause of Error (max 3)",
294
+ info="Select up to 3 error causes"
295
+ )
296
+ with gr.Column():
297
+ cause_error_other = gr.Textbox(
298
+ label="Other Cause of Error",
299
+ placeholder="Specify if not listed above"
300
+ )
301
+
302
+ layer_n = gr.Textbox(
303
+ label="DF-MC Ln - Layer n",
304
+ placeholder="Specify layer/stage number if applicable"
305
+ )
306
+
307
+ # ===== SECTION 2: TOP LEVEL (FIGURE 6) =====
308
+ with gr.Tab("🔝 Top Level Elements (DF MC 0)"):
309
+ gr.Markdown("### Figure 6 - Top Level Elements\nCheck applicable items and provide descriptions.")
310
+
311
+ # Create checkboxes with text areas for each element
312
+ elements = [
313
+ ("type_reasoning", "Type of Reasoning"),
314
+ ("cause_error", "Cause of Error"),
315
+ ("algorithm", "Algorithm"),
316
+ ("inference", "Inference"),
317
+ ("confounder", "Confounder"),
318
+ ("classification", "Classification"),
319
+ ("evaluation", "Evaluation"),
320
+ ("hypothesis", "Hypothesis"),
321
+ ("tool", "Tool"),
322
+ ("bias_debiasing", "Bias/Debiasing"),
323
+ ("case_statement", "Case Statement"),
324
+ ("evidence_mc1", "Evidence MC1"),
325
+ ("file_type", "File Type"),
326
+ ("data_structure", "Data Structure"),
327
+ ("degree_confidence", "Degree of Confidence")
328
+ ]
329
+
330
+ top_level_components = []
331
+ for elem_id, elem_label in elements:
332
+ with gr.Row():
333
+ check = gr.Checkbox(label=f"✓ {elem_label}", value=False)
334
+ desc = gr.TextArea(
335
+ label=f"Description for {elem_label}",
336
+ placeholder=f"Describe {elem_label.lower()} if applicable...",
337
+ lines=2
338
+ )
339
+ top_level_components.extend([check, desc])
340
+
341
+ # ===== SECTION 3: DATA & PROCESSES (FIGURE 7) =====
342
+ with gr.Tab("⚙️ Data & Processes (DF MC 1)"):
343
+ gr.Markdown("### Figure 7 - Data Types and Analytical Processes\nCheck applicable items and provide descriptions.")
344
+
345
+ processes_list = [
346
+ ("event_data", "EVENT/DATA"),
347
+ ("parse_raw", "Parse Raw Data Contained Within the Image"),
348
+ ("validate", "Validate the Data Compared"),
349
+ ("identify_partitions", "Identify Partitions"),
350
+ ("process_filesystem", "Process File System"),
351
+ ("identify_content", "Identify Content (Carving)"),
352
+ ("file_type_id", "File Type Identification"),
353
+ ("file_specific", "File-Specific Processing"),
354
+ ("file_hashing", "File Hashing"),
355
+ ("hash_matching", "Hash Matching"),
356
+ ("mismatched_sig", "Mismatched Signature Detection"),
357
+ ("timeline", "Timeline"),
358
+ ("timeline_analysis", "Timeline Analysis"),
359
+ ("geolocation", "Geolocation"),
360
+ ("geolocation_analysis", "Geolocation Analysis"),
361
+ ("keyword_indexing", "Keyword Indexing"),
362
+ ("keyword_searching", "Keyword Searching"),
363
+ ("automated_result", "Automated Result Interpretation"),
364
+ ("ai_content_flag", "AI-Based Content Flagging")
365
+ ]
366
+
367
+ process_components = []
368
+ for proc_id, proc_label in processes_list:
369
+ with gr.Row():
370
+ check = gr.Checkbox(label=f"✓ {proc_label}", value=False)
371
+ desc = gr.TextArea(
372
+ label=f"Description for {proc_label}",
373
+ placeholder=f"Describe {proc_label.lower()} if applicable...",
374
+ lines=2
375
+ )
376
+ process_components.extend([check, desc])
377
+
378
+ # ===== GENERATION & OUTPUT =====
379
+ gr.Markdown("---")
380
+ gr.Markdown("### Generate Your Model Card")
381
+
382
+ generate_btn = gr.Button("🚀 Generate Model Card", variant="primary", size="lg")
383
+
384
+ with gr.Tabs():
385
+ with gr.Tab("📄 Preview (Markdown)"):
386
+ preview_output = gr.Markdown(label="Markdown Preview")
387
+
388
+ with gr.Tab("💾 Download Files"):
389
+ gr.Markdown("Click the download buttons below to save your generated model card files:")
390
+ with gr.Row():
391
+ json_download = gr.File(label="Download JSON", type="filepath")
392
+ md_download = gr.File(label="Download README.md", type="filepath")
393
+
394
+ # Wire up the generation
395
+ all_inputs = [
396
+ mmcid, version, owner, use_context, case_statement, hypothesis,
397
+ classification, reasoning_type, bias, cause_of_bias, error, cause_of_error, layer_n,
398
+ classification_other, reasoning_other, bias_other, cause_bias_other, cause_error_other
399
+ ] + top_level_components + process_components
400
+
401
+ generate_btn.click(
402
+ fn=generate_model_card,
403
+ inputs=all_inputs,
404
+ outputs=[preview_output, json_download, md_download]
405
+ )
406
+
407
+ gr.Markdown(f"""
408
+ ---
409
+ ### About This Generator
410
+
411
+ **References:**
412
+ - Di Maio, P. (2024). Towards Open Standards for Systemic Complexity in Digital Forensics. https://papers.cool/arxiv/2512.12970
413
+ - Hargreaves, C., Nelson, A., & Casey, E. (2024). An abstract model for digital forensic analysis tools—A foundation for systematic error mitigation analysis. *Forensic Science International: Digital Investigation*, 48.
414
+
415
+ **Generator Version:** {GENERATOR_VERSION}
416
+ **License:** Apache 2.0
417
+ **Contact:** For questions or feedback, please open an issue on the project repository.
418
+ """)
419
+
420
+ if __name__ == "__main__":
421
+ demo.launch()
requirements(2).txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ gradio
2
+ python-dateutil