STARBORN commited on
Commit
a2797d3
·
verified ·
1 Parent(s): 94e1dbe

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -421
app.py DELETED
@@ -1,421 +0,0 @@
1
- """
2
- Digital Forensics Model Card Generator
3
- A tool for creating standardized model cards for digital forensics AI/ML models
4
- Compatible with Gradio 5.x
5
- """
6
-
7
- import gradio as gr
8
- import json
9
- from datetime import datetime
10
- from utils.generator import generate_json_output, generate_markdown_output
11
- from utils.validators import validate_mmcid
12
-
13
- # Version
14
- GENERATOR_VERSION = "1.0.0"
15
-
16
- # Controlled Vocabularies
17
- CV_USE_CONTEXT = ["Standalone", "Integrated", "Hybrid (both standalone and integrated)"]
18
-
19
- CV_CLASSIFICATION = [
20
- "Computer Forensics",
21
- "Network Forensics",
22
- "Mobile Device Forensics",
23
- "Cloud Forensics",
24
- "Database Forensics",
25
- "Memory Forensics",
26
- "Digital Image Forensics",
27
- "Digital Video/Audio Forensics",
28
- "IoT Forensics",
29
- "Multi-domain (covers multiple types)"
30
- ]
31
-
32
- CV_REASONING = [
33
- "Deductive Reasoning (from general to specific)",
34
- "Inductive Reasoning (from specific to general)",
35
- "Abductive Reasoning (inference to best explanation)",
36
- "Retroductive Reasoning (hypothesis refinement)",
37
- "Hybrid/Mixed Reasoning"
38
- ]
39
-
40
- CV_BIAS = [
41
- "Data Bias (historical, sampling, selection)",
42
- "Algorithmic Bias (model architecture, optimization)",
43
- "Human Bias (cognitive, confirmation, implicit)",
44
- "Deployment Bias (context mismatch)",
45
- "Reporting Bias (documentation gaps)",
46
- "Measurement Bias (proxy variables)",
47
- "Stereotyping Bias (reinforcing stereotypes)",
48
- "Automation Bias (over-reliance on automated results)",
49
- "No Identified Bias",
50
- "Multiple Bias Types"
51
- ]
52
-
53
- CV_CAUSE_OF_BIAS = [
54
- "Unrepresentative Training Data",
55
- "Historical Inequities in Data",
56
- "Feature Selection Issues",
57
- "Labeling Inconsistencies",
58
- "Optimization Objective Mismatch",
59
- "Insufficient Diversity in Development Team",
60
- "Lack of Domain Expertise",
61
- "Temporal Drift (data age/staleness)",
62
- "Geographic/Cultural Limitations",
63
- "Tool/Method Limitations",
64
- "Multiple Causes",
65
- "Unknown/Under Investigation"
66
- ]
67
-
68
- CV_CAUSE_OF_ERROR = [
69
- "Training Error (underfitting)",
70
- "Validation Error (model selection issues)",
71
- "Testing Error (generalization failure)",
72
- "Overfitting (high variance)",
73
- "Underfitting (high bias)",
74
- "Data Quality Issues (noise, outliers, mislabeling)",
75
- "Insufficient Training Data",
76
- "Class Imbalance",
77
- "Feature Engineering Issues",
78
- "Hyperparameter Misconfiguration",
79
- "Model Complexity Mismatch",
80
- "Adversarial Attack (poisoning, evasion)",
81
- "Concept Drift",
82
- "Tool Calibration Error",
83
- "Human Error in Analysis",
84
- "Chain of Custody Issues",
85
- "Multiple Error Sources",
86
- "Unknown/Under Investigation"
87
- ]
88
-
89
- def save_to_file(content, filename):
90
- """Helper to save content to a file and return the path"""
91
- filepath = f"/tmp/{filename}"
92
- with open(filepath, 'w') as f:
93
- f.write(content)
94
- return filepath
95
-
96
- def generate_model_card(*args):
97
- """Generate model card outputs from form inputs"""
98
-
99
- # Parse arguments (metadata section)
100
- (mmcid, version, owner, use_context, case_statement, hypothesis,
101
- classification, reasoning_type, bias, cause_of_bias, error, cause_of_error, layer_n,
102
- classification_other, reasoning_other, bias_other, cause_bias_other, cause_error_other) = args[:18]
103
-
104
- # Validate MMCID if provided
105
- if mmcid and not validate_mmcid(mmcid):
106
- return "❌ Invalid MMCID format. Please use format: DF-MC-YYYY-NNN (e.g., DF-MC-2025-001)", None, None
107
-
108
- # Top level elements (15 pairs of checkbox + text)
109
- top_level_args = args[18:48] # 15 * 2 = 30 args
110
-
111
- # Process elements (19 pairs of checkbox + text)
112
- process_args = args[48:86] # 19 * 2 = 38 args
113
-
114
- # Build metadata
115
- metadata = {
116
- "mmcid": mmcid or "Not specified",
117
- "version": version or "N/A",
118
- "owner": owner or "Not specified",
119
- "use_context": use_context or "Not specified",
120
- "case_statement": case_statement,
121
- "hypothesis": hypothesis,
122
- "classification": list(classification) + ([classification_other] if classification_other else []),
123
- "reasoning_type": list(reasoning_type) + ([reasoning_other] if reasoning_other else []),
124
- "bias": list(bias) + ([bias_other] if bias_other else []),
125
- "cause_of_bias": list(cause_of_bias) + ([cause_bias_other] if cause_bias_other else []),
126
- "error": error,
127
- "cause_of_error": list(cause_of_error) + ([cause_error_other] if cause_error_other else []),
128
- "layer_n": layer_n or "N/A"
129
- }
130
-
131
- # Build top level elements
132
- top_level_keys = [
133
- "type_of_reasoning", "cause_of_error", "algorithm", "inference", "confounder",
134
- "classification", "evaluation", "hypothesis", "tool", "bias_debiasing",
135
- "case_statement", "evidence_mc1", "file_type", "data_structure", "degree_of_confidence"
136
- ]
137
-
138
- top_level = {}
139
- for i, key in enumerate(top_level_keys):
140
- check_val = top_level_args[i*2]
141
- desc_val = top_level_args[i*2 + 1]
142
- top_level[key] = {
143
- "applicable": check_val,
144
- "description": desc_val if check_val else ""
145
- }
146
-
147
- # Build process elements
148
- process_keys = [
149
- "event_data", "parse_raw_data", "validate", "identify_partitions",
150
- "process_file_system", "identify_content_carving", "file_type_identification",
151
- "file_specific_processing", "file_hashing", "hash_matching",
152
- "mismatched_signature_detection", "timeline", "timeline_analysis",
153
- "geolocation", "geolocation_analysis", "keyword_indexing",
154
- "keyword_searching", "automated_result_interpretation", "ai_based_content_flagging"
155
- ]
156
-
157
- processes = {}
158
- for i, key in enumerate(process_keys):
159
- check_val = process_args[i*2]
160
- desc_val = process_args[i*2 + 1]
161
- processes[key] = {
162
- "applicable": check_val,
163
- "description": desc_val if check_val else ""
164
- }
165
-
166
- # Generate outputs
167
- json_output = generate_json_output(metadata, top_level, processes, GENERATOR_VERSION)
168
- markdown_output = generate_markdown_output(metadata, top_level, processes, GENERATOR_VERSION)
169
-
170
- # Save to files
171
- json_file = save_to_file(json_output, "model_card.json")
172
- md_file = save_to_file(markdown_output, "README.md")
173
-
174
- return markdown_output, json_file, md_file
175
-
176
-
177
- # Build Gradio Interface
178
- with gr.Blocks(title="Digital Forensics Model Card Generator", theme=gr.themes.Soft()) as demo:
179
- gr.Markdown(f"""
180
- # 🔬 Digital Forensics Model Card Generator
181
-
182
- Create standardized model cards for digital forensics AI/ML systems. Based on:
183
- - Di Maio, P. (2024). Towards Open Standards for Systemic Complexity in Digital Forensics
184
- - Hargreaves, C., Nelson, A., & Casey, E. (2024). An abstract model for digital forensic analysis tools
185
-
186
- **Version {GENERATOR_VERSION}**
187
- """)
188
-
189
- with gr.Tabs():
190
- # ===== SECTION 1: METADATA =====
191
- with gr.Tab("📋 Metadata"):
192
- gr.Markdown("### Model Card Metadata\nAll fields are optional unless otherwise specified.")
193
-
194
- with gr.Row():
195
- mmcid = gr.Textbox(
196
- label="MMCID - Identifier",
197
- placeholder="DF-MC-2025-001",
198
- info="Format: DF-MC-YYYY-NNN"
199
- )
200
- version = gr.Textbox(
201
- label="MCV - Version",
202
- placeholder="1.0 or N/A",
203
- info="Version number or N/A"
204
- )
205
-
206
- owner = gr.Textbox(
207
- label="DF-MCO - Owner",
208
- placeholder="Organization or individual name"
209
- )
210
-
211
- use_context = gr.Dropdown(
212
- choices=CV_USE_CONTEXT,
213
- label="DF-MCUse - Usage Context",
214
- info="How is this model card used?"
215
- )
216
-
217
- case_statement = gr.TextArea(
218
- label="DF-MC CS - Case Statement",
219
- placeholder="Describe the case context...",
220
- lines=3
221
- )
222
-
223
- hypothesis = gr.TextArea(
224
- label="DF-MC H - Hypothesis",
225
- placeholder="State the hypothesis being tested...",
226
- lines=3
227
- )
228
-
229
- gr.Markdown("#### Select up to 3 items for each category:")
230
-
231
- with gr.Row():
232
- with gr.Column():
233
- classification = gr.CheckboxGroup(
234
- choices=CV_CLASSIFICATION,
235
- label="DF-MC C - Classification (max 3)",
236
- info="Select up to 3 forensic domains"
237
- )
238
- with gr.Column():
239
- classification_other = gr.Textbox(
240
- label="Other Classification",
241
- placeholder="Specify if not listed above"
242
- )
243
-
244
- with gr.Row():
245
- with gr.Column():
246
- reasoning_type = gr.CheckboxGroup(
247
- choices=CV_REASONING,
248
- label="DF-MC TR - Type of Reasoning (max 3)",
249
- info="Select up to 3 reasoning types"
250
- )
251
- with gr.Column():
252
- reasoning_other = gr.Textbox(
253
- label="Other Reasoning Type",
254
- placeholder="Specify if not listed above"
255
- )
256
-
257
- with gr.Row():
258
- with gr.Column():
259
- bias = gr.CheckboxGroup(
260
- choices=CV_BIAS,
261
- label="DF-MC B - Bias (max 3)",
262
- info="Select up to 3 bias types"
263
- )
264
- with gr.Column():
265
- bias_other = gr.Textbox(
266
- label="Other Bias",
267
- placeholder="Specify if not listed above"
268
- )
269
-
270
- with gr.Row():
271
- with gr.Column():
272
- cause_of_bias = gr.CheckboxGroup(
273
- choices=CV_CAUSE_OF_BIAS,
274
- label="DF-MC CB - Cause of Bias (max 3)",
275
- info="Select up to 3 causes"
276
- )
277
- with gr.Column():
278
- cause_bias_other = gr.Textbox(
279
- label="Other Cause of Bias",
280
- placeholder="Specify if not listed above"
281
- )
282
-
283
- error = gr.TextArea(
284
- label="DF-MC E - Error",
285
- placeholder="Describe errors encountered...",
286
- lines=3
287
- )
288
-
289
- with gr.Row():
290
- with gr.Column():
291
- cause_of_error = gr.CheckboxGroup(
292
- choices=CV_CAUSE_OF_ERROR,
293
- label="DF-MC CE - Cause of Error (max 3)",
294
- info="Select up to 3 error causes"
295
- )
296
- with gr.Column():
297
- cause_error_other = gr.Textbox(
298
- label="Other Cause of Error",
299
- placeholder="Specify if not listed above"
300
- )
301
-
302
- layer_n = gr.Textbox(
303
- label="DF-MC Ln - Layer n",
304
- placeholder="Specify layer/stage number if applicable"
305
- )
306
-
307
- # ===== SECTION 2: TOP LEVEL (FIGURE 6) =====
308
- with gr.Tab("🔝 Top Level Elements (DF MC 0)"):
309
- gr.Markdown("### Figure 6 - Top Level Elements\nCheck applicable items and provide descriptions.")
310
-
311
- # Create checkboxes with text areas for each element
312
- elements = [
313
- ("type_reasoning", "Type of Reasoning"),
314
- ("cause_error", "Cause of Error"),
315
- ("algorithm", "Algorithm"),
316
- ("inference", "Inference"),
317
- ("confounder", "Confounder"),
318
- ("classification", "Classification"),
319
- ("evaluation", "Evaluation"),
320
- ("hypothesis", "Hypothesis"),
321
- ("tool", "Tool"),
322
- ("bias_debiasing", "Bias/Debiasing"),
323
- ("case_statement", "Case Statement"),
324
- ("evidence_mc1", "Evidence MC1"),
325
- ("file_type", "File Type"),
326
- ("data_structure", "Data Structure"),
327
- ("degree_confidence", "Degree of Confidence")
328
- ]
329
-
330
- top_level_components = []
331
- for elem_id, elem_label in elements:
332
- with gr.Row():
333
- check = gr.Checkbox(label=f"✓ {elem_label}", value=False)
334
- desc = gr.TextArea(
335
- label=f"Description for {elem_label}",
336
- placeholder=f"Describe {elem_label.lower()} if applicable...",
337
- lines=2
338
- )
339
- top_level_components.extend([check, desc])
340
-
341
- # ===== SECTION 3: DATA & PROCESSES (FIGURE 7) =====
342
- with gr.Tab("⚙️ Data & Processes (DF MC 1)"):
343
- gr.Markdown("### Figure 7 - Data Types and Analytical Processes\nCheck applicable items and provide descriptions.")
344
-
345
- processes_list = [
346
- ("event_data", "EVENT/DATA"),
347
- ("parse_raw", "Parse Raw Data Contained Within the Image"),
348
- ("validate", "Validate the Data Compared"),
349
- ("identify_partitions", "Identify Partitions"),
350
- ("process_filesystem", "Process File System"),
351
- ("identify_content", "Identify Content (Carving)"),
352
- ("file_type_id", "File Type Identification"),
353
- ("file_specific", "File-Specific Processing"),
354
- ("file_hashing", "File Hashing"),
355
- ("hash_matching", "Hash Matching"),
356
- ("mismatched_sig", "Mismatched Signature Detection"),
357
- ("timeline", "Timeline"),
358
- ("timeline_analysis", "Timeline Analysis"),
359
- ("geolocation", "Geolocation"),
360
- ("geolocation_analysis", "Geolocation Analysis"),
361
- ("keyword_indexing", "Keyword Indexing"),
362
- ("keyword_searching", "Keyword Searching"),
363
- ("automated_result", "Automated Result Interpretation"),
364
- ("ai_content_flag", "AI-Based Content Flagging")
365
- ]
366
-
367
- process_components = []
368
- for proc_id, proc_label in processes_list:
369
- with gr.Row():
370
- check = gr.Checkbox(label=f"✓ {proc_label}", value=False)
371
- desc = gr.TextArea(
372
- label=f"Description for {proc_label}",
373
- placeholder=f"Describe {proc_label.lower()} if applicable...",
374
- lines=2
375
- )
376
- process_components.extend([check, desc])
377
-
378
- # ===== GENERATION & OUTPUT =====
379
- gr.Markdown("---")
380
- gr.Markdown("### Generate Your Model Card")
381
-
382
- generate_btn = gr.Button("🚀 Generate Model Card", variant="primary", size="lg")
383
-
384
- with gr.Tabs():
385
- with gr.Tab("📄 Preview (Markdown)"):
386
- preview_output = gr.Markdown(label="Markdown Preview")
387
-
388
- with gr.Tab("💾 Download Files"):
389
- gr.Markdown("Click the download buttons below to save your generated model card files:")
390
- with gr.Row():
391
- json_download = gr.File(label="Download JSON", type="filepath")
392
- md_download = gr.File(label="Download README.md", type="filepath")
393
-
394
- # Wire up the generation
395
- all_inputs = [
396
- mmcid, version, owner, use_context, case_statement, hypothesis,
397
- classification, reasoning_type, bias, cause_of_bias, error, cause_of_error, layer_n,
398
- classification_other, reasoning_other, bias_other, cause_bias_other, cause_error_other
399
- ] + top_level_components + process_components
400
-
401
- generate_btn.click(
402
- fn=generate_model_card,
403
- inputs=all_inputs,
404
- outputs=[preview_output, json_download, md_download]
405
- )
406
-
407
- gr.Markdown(f"""
408
- ---
409
- ### About This Generator
410
-
411
- **References:**
412
- - Di Maio, P. (2024). Towards Open Standards for Systemic Complexity in Digital Forensics. https://papers.cool/arxiv/2512.12970
413
- - Hargreaves, C., Nelson, A., & Casey, E. (2024). An abstract model for digital forensic analysis tools—A foundation for systematic error mitigation analysis. *Forensic Science International: Digital Investigation*, 48.
414
-
415
- **Generator Version:** {GENERATOR_VERSION}
416
- **License:** Apache 2.0
417
- **Contact:** For questions or feedback, please open an issue on the project repository.
418
- """)
419
-
420
- if __name__ == "__main__":
421
- demo.launch()