STARBORN commited on
Commit
ca65075
·
verified ·
1 Parent(s): a2797d3

Upload app(5).py

Browse files
Files changed (1) hide show
  1. app(5).py +430 -0
app(5).py ADDED
@@ -0,0 +1,430 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Digital Forensics Model Card Generator - Single Form Version
3
+ A tool for creating standardized model cards for digital forensics AI/ML models
4
+ """
5
+
6
+ import gradio as gr
7
+ import json
8
+ from datetime import datetime
9
+ from utils.generator import generate_json_output, generate_markdown_output
10
+ from utils.validators import validate_mmcid
11
+
12
+ # Version
13
+ GENERATOR_VERSION = "1.0.0-beta"
14
+
15
+ # Controlled Vocabularies
16
+ CV_USE_CONTEXT = ["Standalone", "Integrated", "Hybrid (both standalone and integrated)"]
17
+
18
+ CV_CLASSIFICATION = [
19
+ "Computer Forensics",
20
+ "Network Forensics",
21
+ "Mobile Device Forensics",
22
+ "Cloud Forensics",
23
+ "Database Forensics",
24
+ "Memory Forensics",
25
+ "Digital Image Forensics",
26
+ "Digital Video/Audio Forensics",
27
+ "IoT Forensics",
28
+ "Multi-domain (covers multiple types)"
29
+ ]
30
+
31
+ CV_REASONING = [
32
+ "Deductive Reasoning (from general to specific)",
33
+ "Inductive Reasoning (from specific to general)",
34
+ "Abductive Reasoning (inference to best explanation)",
35
+ "Retroductive Reasoning (hypothesis refinement)",
36
+ "Hybrid/Mixed Reasoning"
37
+ ]
38
+
39
+ CV_BIAS = [
40
+ "Data Bias (historical, sampling, selection)",
41
+ "Algorithmic Bias (model architecture, optimization)",
42
+ "Human Bias (cognitive, confirmation, implicit)",
43
+ "Deployment Bias (context mismatch)",
44
+ "Reporting Bias (documentation gaps)",
45
+ "Measurement Bias (proxy variables)",
46
+ "Stereotyping Bias (reinforcing stereotypes)",
47
+ "Automation Bias (over-reliance on automated results)",
48
+ "No Identified Bias",
49
+ "Multiple Bias Types"
50
+ ]
51
+
52
+ CV_CAUSE_OF_BIAS = [
53
+ "Unrepresentative Training Data",
54
+ "Historical Inequities in Data",
55
+ "Feature Selection Issues",
56
+ "Labeling Inconsistencies",
57
+ "Optimization Objective Mismatch",
58
+ "Insufficient Diversity in Development Team",
59
+ "Lack of Domain Expertise",
60
+ "Temporal Drift (data age/staleness)",
61
+ "Geographic/Cultural Limitations",
62
+ "Tool/Method Limitations",
63
+ "Multiple Causes",
64
+ "Unknown/Under Investigation"
65
+ ]
66
+
67
+ CV_CAUSE_OF_ERROR = [
68
+ "Training Error (underfitting)",
69
+ "Validation Error (model selection issues)",
70
+ "Testing Error (generalization failure)",
71
+ "Overfitting (high variance)",
72
+ "Underfitting (high bias)",
73
+ "Data Quality Issues (noise, outliers, mislabeling)",
74
+ "Insufficient Training Data",
75
+ "Class Imbalance",
76
+ "Feature Engineering Issues",
77
+ "Hyperparameter Misconfiguration",
78
+ "Model Complexity Mismatch",
79
+ "Adversarial Attack (poisoning, evasion)",
80
+ "Concept Drift",
81
+ "Tool Calibration Error",
82
+ "Human Error in Analysis",
83
+ "Chain of Custody Issues",
84
+ "Multiple Error Sources",
85
+ "Unknown/Under Investigation"
86
+ ]
87
+
88
+ def save_to_file(content, filename):
89
+ """Helper to save content to a file and return the path"""
90
+ filepath = f"/tmp/{filename}"
91
+ with open(filepath, 'w') as f:
92
+ f.write(content)
93
+ return filepath
94
+
95
+ def generate_model_card(*args):
96
+ """Generate model card outputs from form inputs"""
97
+
98
+ # Unpack all arguments in sequence
99
+ (mmcid, version, owner, use_context, layer_n,
100
+ case_statement, hypothesis,
101
+ classification, classification_other,
102
+ reasoning_type, reasoning_other,
103
+ bias, bias_other,
104
+ cause_of_bias, cause_bias_other,
105
+ error, cause_of_error, cause_error_other) = args[:18]
106
+
107
+ # Remaining args are MC0 and MC1 elements (checkbox + text pairs)
108
+ remaining_args = args[18:]
109
+
110
+ # Validate MMCID if provided
111
+ if mmcid and not validate_mmcid(mmcid):
112
+ return "❌ Invalid MMCID format. Please use format: DF-MC-YYYY-NNN (e.g., DF-MC-2025-001)", None, None
113
+
114
+ # Build metadata
115
+ metadata = {
116
+ "mmcid": mmcid or "Not specified",
117
+ "version": version or "N/A",
118
+ "owner": owner or "Not specified",
119
+ "use_context": use_context or "Not specified",
120
+ "layer_n": layer_n or "N/A",
121
+ "case_statement": case_statement,
122
+ "hypothesis": hypothesis,
123
+ "classification": list(classification) + ([classification_other] if classification_other else []),
124
+ "reasoning_type": list(reasoning_type) + ([reasoning_other] if reasoning_other else []),
125
+ "bias": list(bias) + ([bias_other] if bias_other else []),
126
+ "cause_of_bias": list(cause_of_bias) + ([cause_bias_other] if cause_bias_other else []),
127
+ "error": error,
128
+ "cause_of_error": list(cause_of_error) + ([cause_error_other] if cause_error_other else [])
129
+ }
130
+
131
+ # MC0 Top Level Elements (9 elements after removing duplicates)
132
+ mc0_keys = [
133
+ "algorithm", "inference", "confounder", "evaluation", "tool",
134
+ "evidence_mc1", "file_type", "data_structure", "degree_of_confidence"
135
+ ]
136
+
137
+ top_level = {}
138
+ for i, key in enumerate(mc0_keys):
139
+ check_val = remaining_args[i*2]
140
+ desc_val = remaining_args[i*2 + 1]
141
+ top_level[key] = {
142
+ "applicable": check_val,
143
+ "description": desc_val if check_val else ""
144
+ }
145
+
146
+ # MC1 Data & Processes (19 elements)
147
+ process_start_idx = len(mc0_keys) * 2
148
+ process_keys = [
149
+ "event_data", "parse_raw_data", "validate", "identify_partitions",
150
+ "process_file_system", "identify_content_carving", "file_type_identification",
151
+ "file_specific_processing", "file_hashing", "hash_matching",
152
+ "mismatched_signature_detection", "timeline", "timeline_analysis",
153
+ "geolocation", "geolocation_analysis", "keyword_indexing",
154
+ "keyword_searching", "automated_result_interpretation", "ai_based_content_flagging"
155
+ ]
156
+
157
+ processes = {}
158
+ for i, key in enumerate(process_keys):
159
+ idx = process_start_idx + (i * 2)
160
+ check_val = remaining_args[idx]
161
+ desc_val = remaining_args[idx + 1]
162
+ processes[key] = {
163
+ "applicable": check_val,
164
+ "description": desc_val if check_val else ""
165
+ }
166
+
167
+ # Generate outputs
168
+ json_output = generate_json_output(metadata, top_level, processes, GENERATOR_VERSION)
169
+ markdown_output = generate_markdown_output(metadata, top_level, processes, GENERATOR_VERSION)
170
+
171
+ # Save to files
172
+ json_file = save_to_file(json_output, "model_card.json")
173
+ md_file = save_to_file(markdown_output, "README.md")
174
+
175
+ return markdown_output, json_file, md_file
176
+
177
+
178
+ # Build Single-Form Gradio Interface
179
+ with gr.Blocks(title="Digital Forensics Model Card Generator", theme=gr.themes.Soft()) as demo:
180
+
181
+ gr.Markdown(f"""
182
+ # 🔬 Digital Forensics Model Card Generator
183
+
184
+ Create standardized model cards for digital forensics AI/ML systems.
185
+
186
+ **Based on:**
187
+ - Di Maio, P. (2024). Towards Open Standards for Systemic Complexity in Digital Forensics
188
+ - Hargreaves, C., Nelson, A., & Casey, E. (2024). An abstract model for digital forensic analysis tools
189
+
190
+ **Version:** {GENERATOR_VERSION}
191
+
192
+ ---
193
+ """)
194
+
195
+ # SECTION 1: IDENTIFICATION & CONTEXT
196
+ gr.Markdown("## 📋 Section 1: Identification & Context")
197
+
198
+ with gr.Row():
199
+ mmcid = gr.Textbox(
200
+ label="MMCID - Identifier",
201
+ placeholder="DF-MC-2025-001",
202
+ info="Format: DF-MC-YYYY-NNN"
203
+ )
204
+ version = gr.Textbox(
205
+ label="MCV - Version",
206
+ placeholder="1.0 or N/A"
207
+ )
208
+
209
+ with gr.Row():
210
+ owner = gr.Textbox(
211
+ label="DF-MCO - Owner",
212
+ placeholder="Organization or individual name"
213
+ )
214
+ use_context = gr.Dropdown(
215
+ choices=CV_USE_CONTEXT,
216
+ label="DF-MCUse - Usage Context"
217
+ )
218
+
219
+ layer_n = gr.Textbox(
220
+ label="DF-MC Ln - Layer/Stage",
221
+ placeholder="Specify layer or stage number if applicable"
222
+ )
223
+
224
+ # SECTION 2: CASE CONTEXT
225
+ gr.Markdown("## 📝 Section 2: Case Context")
226
+
227
+ case_statement = gr.TextArea(
228
+ label="DF-MC CS - Case Statement",
229
+ placeholder="Describe the case context, investigation scope, and objectives...",
230
+ lines=3
231
+ )
232
+
233
+ hypothesis = gr.TextArea(
234
+ label="DF-MC H - Hypothesis",
235
+ placeholder="State the hypothesis being tested or investigated...",
236
+ lines=3
237
+ )
238
+
239
+ # SECTION 3: CLASSIFICATION & APPROACH
240
+ gr.Markdown("## 🔍 Section 3: Classification & Approach")
241
+ gr.Markdown("*Select up to 3 items from each controlled vocabulary*")
242
+
243
+ with gr.Row():
244
+ with gr.Column():
245
+ classification = gr.CheckboxGroup(
246
+ choices=CV_CLASSIFICATION,
247
+ label="DF-MC C - Classification (max 3)",
248
+ info="Select forensic domain(s)"
249
+ )
250
+ with gr.Column():
251
+ classification_other = gr.Textbox(
252
+ label="Other Classification",
253
+ placeholder="Specify if not listed"
254
+ )
255
+
256
+ with gr.Row():
257
+ with gr.Column():
258
+ reasoning_type = gr.CheckboxGroup(
259
+ choices=CV_REASONING,
260
+ label="DF-MC TR - Type of Reasoning (max 3)",
261
+ info="Select reasoning method(s)"
262
+ )
263
+ with gr.Column():
264
+ reasoning_other = gr.Textbox(
265
+ label="Other Reasoning",
266
+ placeholder="Specify if not listed"
267
+ )
268
+
269
+ # SECTION 4: QUALITY & LIMITATIONS
270
+ gr.Markdown("## ⚠️ Section 4: Quality & Limitations")
271
+
272
+ with gr.Row():
273
+ with gr.Column():
274
+ bias = gr.CheckboxGroup(
275
+ choices=CV_BIAS,
276
+ label="DF-MC B - Bias (max 3)",
277
+ info="Identify bias type(s)"
278
+ )
279
+ with gr.Column():
280
+ bias_other = gr.Textbox(
281
+ label="Other Bias",
282
+ placeholder="Specify if not listed"
283
+ )
284
+
285
+ with gr.Row():
286
+ with gr.Column():
287
+ cause_of_bias = gr.CheckboxGroup(
288
+ choices=CV_CAUSE_OF_BIAS,
289
+ label="DF-MC CB - Cause of Bias (max 3)",
290
+ info="Identify root cause(s)"
291
+ )
292
+ with gr.Column():
293
+ cause_bias_other = gr.Textbox(
294
+ label="Other Cause of Bias",
295
+ placeholder="Specify if not listed"
296
+ )
297
+
298
+ error = gr.TextArea(
299
+ label="DF-MC E - Error Description",
300
+ placeholder="Describe any errors encountered during analysis...",
301
+ lines=3
302
+ )
303
+
304
+ with gr.Row():
305
+ with gr.Column():
306
+ cause_of_error = gr.CheckboxGroup(
307
+ choices=CV_CAUSE_OF_ERROR,
308
+ label="DF-MC CE - Cause of Error (max 3)",
309
+ info="Identify error cause(s)"
310
+ )
311
+ with gr.Column():
312
+ cause_error_other = gr.Textbox(
313
+ label="Other Cause of Error",
314
+ placeholder="Specify if not listed"
315
+ )
316
+
317
+ # SECTION 5: TOP LEVEL ELEMENTS (MC0 - Figure 6, deduplicated)
318
+ gr.Markdown("## 🔝 Section 5: Top Level Elements (DF MC 0 - Figure 6)")
319
+ gr.Markdown("*Check applicable elements and provide descriptions*")
320
+
321
+ mc0_elements = [
322
+ ("algorithm", "Algorithm"),
323
+ ("inference", "Inference"),
324
+ ("confounder", "Confounder"),
325
+ ("evaluation", "Evaluation"),
326
+ ("tool", "Tool"),
327
+ ("evidence_mc1", "Evidence MC1"),
328
+ ("file_type", "File Type"),
329
+ ("data_structure", "Data Structure"),
330
+ ("degree_confidence", "Degree of Confidence")
331
+ ]
332
+
333
+ mc0_components = []
334
+ for elem_id, elem_label in mc0_elements:
335
+ with gr.Row():
336
+ check = gr.Checkbox(label=f"✓ {elem_label}", value=False)
337
+ desc = gr.TextArea(
338
+ label=f"Description",
339
+ placeholder=f"Describe {elem_label.lower()} if applicable...",
340
+ lines=2
341
+ )
342
+ mc0_components.extend([check, desc])
343
+
344
+ # SECTION 6: DATA & PROCESSES (MC1 - Figure 7)
345
+ gr.Markdown("## ⚙️ Section 6: Data Types & Analytical Processes (DF MC 1 - Figure 7)")
346
+ gr.Markdown("*Check applicable processes and describe how they were performed*")
347
+
348
+ mc1_processes = [
349
+ ("event_data", "EVENT/DATA"),
350
+ ("parse_raw", "Parse Raw Data Contained Within the Image"),
351
+ ("validate", "Validate the Data Compared"),
352
+ ("identify_partitions", "Identify Partitions"),
353
+ ("process_filesystem", "Process File System"),
354
+ ("identify_content", "Identify Content (Carving)"),
355
+ ("file_type_id", "File Type Identification"),
356
+ ("file_specific", "File-Specific Processing"),
357
+ ("file_hashing", "File Hashing"),
358
+ ("hash_matching", "Hash Matching"),
359
+ ("mismatched_sig", "Mismatched Signature Detection"),
360
+ ("timeline", "Timeline"),
361
+ ("timeline_analysis", "Timeline Analysis"),
362
+ ("geolocation", "Geolocation"),
363
+ ("geolocation_analysis", "Geolocation Analysis"),
364
+ ("keyword_indexing", "Keyword Indexing"),
365
+ ("keyword_searching", "Keyword Searching"),
366
+ ("automated_result", "Automated Result Interpretation"),
367
+ ("ai_content_flag", "AI-Based Content Flagging")
368
+ ]
369
+
370
+ mc1_components = []
371
+ for proc_id, proc_label in mc1_processes:
372
+ with gr.Row():
373
+ check = gr.Checkbox(label=f"✓ {proc_label}", value=False)
374
+ desc = gr.TextArea(
375
+ label=f"Description",
376
+ placeholder=f"Describe how {proc_label.lower()} was performed...",
377
+ lines=2
378
+ )
379
+ mc1_components.extend([check, desc])
380
+
381
+ # GENERATION & OUTPUT
382
+ gr.Markdown("---")
383
+ gr.Markdown("## 🚀 Generate Your Model Card")
384
+
385
+ generate_btn = gr.Button("Generate Model Card", variant="primary", size="lg")
386
+
387
+ gr.Markdown("### Preview & Download")
388
+
389
+ with gr.Row():
390
+ with gr.Column():
391
+ gr.Markdown("**Markdown Preview:**")
392
+ preview_output = gr.Markdown()
393
+ with gr.Column():
394
+ gr.Markdown("**Download Files:**")
395
+ json_download = gr.File(label="JSON File", type="filepath")
396
+ md_download = gr.File(label="README.md", type="filepath")
397
+
398
+ # Wire up generation
399
+ all_inputs = [
400
+ mmcid, version, owner, use_context, layer_n,
401
+ case_statement, hypothesis,
402
+ classification, classification_other,
403
+ reasoning_type, reasoning_other,
404
+ bias, bias_other,
405
+ cause_of_bias, cause_bias_other,
406
+ error, cause_of_error, cause_error_other
407
+ ] + mc0_components + mc1_components
408
+
409
+ generate_btn.click(
410
+ fn=generate_model_card,
411
+ inputs=all_inputs,
412
+ outputs=[preview_output, json_download, md_download]
413
+ )
414
+
415
+ gr.Markdown(f"""
416
+ ---
417
+ ### 📚 References & Information
418
+
419
+ **References:**
420
+ - Di Maio, P. (2024). Towards Open Standards for Systemic Complexity in Digital Forensics. https://papers.cool.arxiv/2512.12970
421
+ - Hargreaves, C., Nelson, A., & Casey, E. (2024). An abstract model for digital forensic analysis tools—A foundation for systematic error mitigation analysis. *Forensic Science International: Digital Investigation*, 48.
422
+
423
+ **Generator Version:** {GENERATOR_VERSION} (Beta)
424
+ **License:** Apache 2.0
425
+
426
+ *This is a beta version. All fields are optional. Feedback welcome!*
427
+ """)
428
+
429
+ if __name__ == "__main__":
430
+ demo.launch()