Sakib Ahmed commited on
Commit
90b3b3f
·
1 Parent(s): 298eadd

Your commit message

Browse files
Files changed (6) hide show
  1. Dockerfile +19 -0
  2. analyzer.py +372 -0
  3. app.py +23 -0
  4. docker-compose.yml +10 -0
  5. requirements.txt +3 -0
  6. templates/index.html +250 -0
Dockerfile ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Create a non-root user
6
+ RUN useradd -m -u 1000 user
7
+
8
+ # Give full permissions to the app directory
9
+ RUN chmod -R 777 /app
10
+
11
+ COPY requirements.txt .
12
+ RUN pip install --no-cache-dir -r requirements.txt
13
+
14
+ COPY . .
15
+
16
+ # Run app.py when the container launches
17
+ EXPOSE 7860
18
+
19
+ CMD ["gunicorn", "app:app", "--bind", "0.0.0.0:7860"]
analyzer.py ADDED
@@ -0,0 +1,372 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import json
3
+
4
+ def extract_entities(log_content):
5
+ # Initialize entity dictionaries
6
+ entities = {
7
+ "DateTime": [],
8
+ "System": [],
9
+ "Service": [],
10
+ "Process": [],
11
+ "Action": [],
12
+ "IPAddress": [],
13
+ "DNSName": [],
14
+ "Username": [],
15
+ "Role": [],
16
+ "Metadata": [],
17
+ "Status": [],
18
+ "Error": [],
19
+ "Severity": [],
20
+ "SessionID": [],
21
+ "SessionStatus": [],
22
+ "FileName": [],
23
+ "Object": [],
24
+ "ApplicationSpecific": [],
25
+ "AuthenticationType": [],
26
+ "ResourceType": [],
27
+ "ResourceUsage": [],
28
+ # Add new entity types
29
+ "TimeServer": [],
30
+ "Port": [],
31
+ "SourcePort": [],
32
+ "DestinationPort": [],
33
+ "Protocol": [],
34
+ "Interface": [],
35
+ "InterfaceType": [],
36
+ "Subnet": [],
37
+ "Rule": [],
38
+ "TTL": [],
39
+ "MAC": [],
40
+ "Flags": [],
41
+ "CPU": [],
42
+ "MemoryInfo": [],
43
+ "Hypervisor": [],
44
+ "Device": [],
45
+ "FileSystem": [],
46
+ "DataBus": [],
47
+ "EventID": [],
48
+ "CMD": []
49
+ }
50
+
51
+ # Process log content line by line to maintain proper context
52
+ lines = log_content.split('\n')
53
+
54
+ # DateTime patterns with clear start/end markers
55
+ # ISO timestamps in JSON - starts with @timestamp": " and ends with "
56
+ iso_timestamps = re.findall(r'@timestamp\":\s*\"([^\"]+)\"', log_content)
57
+ entities["DateTime"].extend(iso_timestamps)
58
+
59
+ # Log timestamps - starts with year or month and ends with space
60
+ for line in lines:
61
+ # YYYY-MM-DD HH:MM:SS format (starts with digit, ends with space)
62
+ date_match = re.search(r'^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\s', line)
63
+ if date_match:
64
+ entities["DateTime"].append(date_match.group(1))
65
+
66
+ # Month DD HH:MM:SS format (starts with month name, ends with space)
67
+ syslog_match = re.match(r'^(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+(\d{1,2})\s+(\d{2}:\d{2}:\d{2})\s', line)
68
+ if syslog_match:
69
+ month, day, time = syslog_match.groups()
70
+ entities["DateTime"].append(f"{month} {day} {time}")
71
+
72
+ # Audit timestamps - starts with msg=audit( and ends with :
73
+ audit_timestamps = re.findall(r'msg=audit\((\d+\.\d+):', log_content)
74
+ entities["DateTime"].extend(audit_timestamps)
75
+
76
+ # System/Node patterns - with clear start/end markers
77
+ # Extract hostname from JSON - starts with "hostname": " and ends with "
78
+ hostnames = re.findall(r'\"hostname\":\s*\"([^\"]+)\"', log_content)
79
+ entities["System"].extend(hostnames)
80
+
81
+ # Extract hostname from JSON host field - starts with "host": {"name": " and ends with "
82
+ host_names = re.findall(r'\"host\":\s*\{[^}]*\"name\":\s*\"([^\"]+)\"', log_content)
83
+ entities["System"].extend(host_names)
84
+
85
+ # Extract hostname from syslog format - after timestamp and before service
86
+ for line in lines:
87
+ syslog_match = re.match(r'^(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+\d{1,2}\s+\d{2}:\d{2}:\d{2}\s+([^\s:]+)\s', line)
88
+ if syslog_match and syslog_match.group(1) not in ["?", "-"]:
89
+ entities["System"].append(syslog_match.group(1))
90
+
91
+ # Service patterns - with clear start/end markers
92
+ # Extract service from syslog format - after hostname and before colon or bracket
93
+ for line in lines:
94
+ service_match = re.search(r'^\w+\s+\d{1,2}\s+\d{2}:\d{2}:\d{2}\s+[^\s]+\s+([^:\[\s]+)(?:\[\d+\])?:', line)
95
+ if service_match and service_match.group(1) not in ["?", "-"]:
96
+ entities["Service"].append(service_match.group(1))
97
+
98
+ # Extract service from JSON - starts with "service": {"type": " and ends with "
99
+ service_types = re.findall(r'\"service\":\s*\{\s*\"type\":\s*\"([^\"]+)\"', log_content)
100
+ entities["Service"].extend(service_types)
101
+
102
+ # Extract agent types - starts with "agent": and contains "type": " and ends with "
103
+ agent_types = re.findall(r'\"agent\":[^}]*\"type\":\s*\"([^\"]+)\"', log_content)
104
+ entities["Service"].extend(agent_types)
105
+
106
+ # Process IDs - with clear start/end markers
107
+ # Extract PIDs from brackets - starts with [ and ends with ]
108
+ for line in lines:
109
+ pid_matches = re.findall(r'(?:sshd|dnsmasq|cron|systemd|openvpn|metricbeat)\[(\d+)\]', line)
110
+ entities["Process"].extend(pid_matches)
111
+
112
+ # Extract PIDs from audit logs - starts with pid= and ends with space
113
+ pid_audit = re.findall(r'pid=(\d+)\s', log_content)
114
+ entities["Process"].extend(pid_audit)
115
+
116
+ # Action patterns - with clear start/end markers
117
+ # Session actions - starts with session and ends with for user or space
118
+ session_actions = re.findall(r'session\s+(opened|closed)(?:\s+for\s+user|\s)', log_content)
119
+ entities["Action"].extend(session_actions)
120
+
121
+ # DNS actions - starts with dnsmasq[PID]: and ends with space
122
+ for line in lines:
123
+ if "dnsmasq" in line:
124
+ dns_match = re.search(r'dnsmasq\[\d+\]:\s+(query|forwarded|reply|cached|NODATA-IPv[46])(?:\s)', line)
125
+ if dns_match:
126
+ entities["Action"].append(dns_match.group(1))
127
+
128
+ # VPN actions - starts with clear identifier and ends with space
129
+ vpn_actions = re.findall(r'(?:TLS|VERIFY)\s+(OK|soft\s+reset)(?:\s|$)', log_content)
130
+ entities["Action"].extend(vpn_actions)
131
+
132
+ # IP Address patterns - with clear start/end markers
133
+ # Find IPs with context - starts with from/to/is and ends with space or port
134
+ for line in lines:
135
+ ip_from = re.findall(r'from\s+(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})(?:\s|$|:)', line)
136
+ entities["IPAddress"].extend(ip_from)
137
+
138
+ ip_to = re.findall(r'to\s+(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})(?:\s|$)', line)
139
+ entities["IPAddress"].extend(ip_to)
140
+
141
+ ip_is = re.findall(r'is\s+(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})(?:\s|$)', line)
142
+ entities["IPAddress"].extend(ip_is)
143
+
144
+ # Find IPs in VPN logs - starts with username/ and ends with :
145
+ vpn_ips = re.findall(r'[a-zA-Z0-9]+/(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):', log_content)
146
+ entities["IPAddress"].extend(vpn_ips)
147
+
148
+ # Extract Source IP more comprehensively
149
+ src_ips = re.findall(r'SRC=(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})', log_content)
150
+ entities["IPAddress"].extend(src_ips)
151
+
152
+ # Extract Destination IP more comprehensively
153
+ dst_ips = re.findall(r'DST=(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})', log_content)
154
+ entities["IPAddress"].extend(dst_ips)
155
+
156
+ # DNS Name patterns - with clear start/end markers
157
+ # Find domains in DNS queries - starts with query/forwarded/reply and ends with from/to/is
158
+ for line in lines:
159
+ if "dnsmasq" in line:
160
+ dns_match = re.search(r'(?:query\[[A-Z]+\]|forwarded|reply)\s+([-a-zA-Z0-9.*_/]+(?:\.[a-zA-Z0-9.*_/-]+)+)(?:\s+from|\s+to|\s+is)', line)
161
+ if dns_match:
162
+ entities["DNSName"].append(dns_match.group(1))
163
+
164
+ # Username patterns - with clear start/end markers
165
+ # Extract usernames from quotes - starts with user=" or acct=" and ends with "
166
+ usernames_quoted = re.findall(r'(?:user|acct)=\"([^\"]+)\"', log_content)
167
+ entities["Username"].extend(usernames_quoted)
168
+
169
+ # Extract usernames from VPN logs - username before slash and IP
170
+ vpn_users = re.findall(r'(\w+)/\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:', log_content)
171
+ entities["Username"].extend(vpn_users)
172
+
173
+ # Extract usernames from session logs - starts with for user and ends with space or by
174
+ usernames_session = re.findall(r'for\s+user\s+(\w+)(?:\s|$|by)', log_content)
175
+ entities["Username"].extend(usernames_session)
176
+
177
+ # Extract usernames from SSH logs - starts with Accepted type for and ends with from
178
+ usernames_ssh = re.findall(r'Accepted\s+\w+\s+for\s+(\w+)\s+from', log_content)
179
+ entities["Username"].extend(usernames_ssh)
180
+
181
+ # Time Server patterns - NTP servers with port 123
182
+ time_servers = re.findall(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):123\s+\(([^)]+)\)', log_content)
183
+ for ip, name in time_servers:
184
+ entities["TimeServer"].append(f"{ip}:123 ({name})")
185
+
186
+ # Port patterns - explicit port mentions
187
+ port_patterns = re.findall(r'(?:port\s+|:)(\d+)(?:\s|$|,|\))', log_content)
188
+ entities["Port"].extend(port_patterns)
189
+
190
+ # Source Port patterns
191
+ source_ports = re.findall(r'SPT=(\d+)', log_content)
192
+ entities["SourcePort"].extend(source_ports)
193
+
194
+ # Destination Port patterns
195
+ dest_ports = re.findall(r'DPT=(\d+)', log_content)
196
+ entities["DestinationPort"].extend(dest_ports)
197
+
198
+ # Protocol patterns
199
+ protocols = re.findall(r'(?:PROTO=|protocol\s+)([a-zA-Z]+\d*)', log_content) # Modified to avoid numeric-only protocols
200
+ entities["Protocol"].extend(protocols)
201
+ # Add common protocols if mentioned
202
+ for proto in ["tcp", "udp", "icmp", "TCP", "IPv4", "IPv6"]:
203
+ if re.search(r'\b' + proto + r'\b', log_content, re.IGNORECASE):
204
+ entities["Protocol"].append(proto)
205
+
206
+ # Interface patterns
207
+ interfaces = re.findall(r'(?:interface|dev)\s+(ens\d+|eth\d+|wlan\d+|lo)', log_content)
208
+ entities["Interface"].extend(interfaces)
209
+
210
+ # Interface Type patterns
211
+ interface_types = re.findall(r'(?:zone|type)\s+(inet|lan|dmz|wan)', log_content, re.IGNORECASE)
212
+ entities["InterfaceType"].extend(interface_types)
213
+
214
+ # Subnet patterns
215
+ subnets = re.findall(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/\d{1,2})', log_content)
216
+ entities["Subnet"].extend(subnets)
217
+
218
+ # Rule patterns
219
+ rules = re.findall(r'(DNAT|ACCEPT|REJECT|DROP|Policy)\s', log_content)
220
+ entities["Rule"].extend(rules)
221
+
222
+ # TTL patterns
223
+ ttls = re.findall(r'TTL=(\d+)', log_content)
224
+ entities["TTL"].extend(ttls)
225
+
226
+ # MAC Address patterns
227
+ macs = re.findall(r'((?:[0-9a-fA-F]{2}:){5}[0-9a-fA-F]{2}|(?:[0-9a-fA-F]{2}-){5}[0-9a-fA-F]{2})', log_content)
228
+ entities["MAC"].extend(macs)
229
+
230
+ # Flags patterns
231
+ flags = re.findall(r'(?:flags|FLAG)\s+(\w+)', log_content)
232
+ entities["Flags"].extend(flags)
233
+ # Add specific flags if found
234
+ for flag in ["RST", "DF", "ACK", "SYN", "FIN", "PSH", "URG"]:
235
+ if re.search(r'\b' + flag + r'\b', log_content):
236
+ entities["Flags"].append(flag)
237
+
238
+ # CPU patterns
239
+ cpu_info = re.findall(r'(Intel GenuineIntel|AMD AuthenticAMD|Centaur CentaurHauls)', log_content)
240
+ entities["CPU"].extend(cpu_info)
241
+
242
+ # Memory Info patterns
243
+ memory_info = re.findall(r'mem\s+0x[0-9a-f]+-0x[0-9a-f]+\s+([a-z]+)', log_content)
244
+ entities["MemoryInfo"].extend(memory_info)
245
+
246
+ # Hypervisor patterns
247
+ hypervisor = re.findall(r'Hypervisor detected:\s+(\w+)', log_content)
248
+ entities["Hypervisor"].extend(hypervisor)
249
+
250
+ # Device/Component patterns
251
+ devices = re.findall(r'(?:device|component):\s+([a-zA-Z0-9_-]+)', log_content)
252
+ entities["Device"].extend(devices)
253
+ # Add common devices if found
254
+ for device in ["PCI-DMA", "ehci_hcd", "usb", "rtc_cmos", "virtio-pci", "i8042", "ata_piix"]:
255
+ if re.search(r'\b' + device + r'\b', log_content):
256
+ entities["Device"].append(device)
257
+
258
+ # File System patterns
259
+ filesystems = re.findall(r'(squashfs|ext4|xfs|btrfs):\s', log_content)
260
+ entities["FileSystem"].extend(filesystems)
261
+
262
+ # Data Bus patterns
263
+ for bus in ["PCI", "USB", "i2c", "PS/2", "Serial", "ATA", "SATA", "TUN/TAP"]:
264
+ if re.search(r'\b' + bus + r'\b', log_content):
265
+ entities["DataBus"].append(bus)
266
+
267
+ # Event ID patterns
268
+ event_ids = re.findall(r'\[(\d+\.\d+)\]', log_content)
269
+ entities["EventID"].extend(event_ids)
270
+
271
+ # CMD patterns
272
+ cmd_patterns = re.findall(r'CMD\s+\(([^)]+)\)', log_content)
273
+ entities["CMD"].extend(cmd_patterns)
274
+
275
+ # Enhanced file path detection
276
+ file_paths = re.findall(r'(/etc/[a-zA-Z0-9_/.-]+)', log_content)
277
+ entities["FileName"].extend(file_paths)
278
+
279
+ # File name patterns - with clear file extensions
280
+ for line in lines:
281
+ # Look for common file extensions - starts with word character and ends with known extension
282
+ file_match = re.search(r'(?<!\S)([a-zA-Z0-9_-]+\.(?:xlsx|txt|java|log|csv|pdf|docx|cfg|conf))(?:\s|$|\.)', line)
283
+ if file_match:
284
+ entities["FileName"].append(file_match.group(1))
285
+
286
+ # Enhanced severity level detection
287
+ for severity in ["warning", "info", "error", "debug", "notice", "critical", "alert", "emergency"]:
288
+ if re.search(r'\b' + severity + r'\b', log_content, re.IGNORECASE):
289
+ entities["Severity"].append(severity)
290
+
291
+ # Session status patterns
292
+ session_status = re.findall(r'session\s+(opened|closed)', log_content)
293
+ entities["SessionStatus"].extend(session_status)
294
+
295
+ # Error message patterns
296
+ error_patterns = re.findall(r'(?:error|failure|failed):\s+([^,\n]+)', log_content, re.IGNORECASE)
297
+ entities["Error"].extend(error_patterns)
298
+
299
+ # Authentication type patterns
300
+ auth_types = re.findall(r'Accepted\s+(\w+)', log_content)
301
+ entities["AuthenticationType"].extend(auth_types)
302
+
303
+ # Status patterns - with clear start/end markers
304
+ # Extract PAM results - starts with res= and ends with space or quote
305
+ status_pam = re.findall(r'res=(\w+)(?:\s|\'|\")', log_content)
306
+ entities["Status"].extend(status_pam)
307
+
308
+ # Extract verification results - starts with VERIFY and ends with space
309
+ status_verify = re.findall(r'VERIFY\s+(OK|FAILED|KU OK|EKU OK)(?:\s|$)', log_content)
310
+ entities["Status"].extend(status_verify)
311
+
312
+ # Resource type patterns - with clear indicators
313
+ resource_types = re.findall(r'(?:resource|type):\s+([a-zA-Z0-9_-]+)', log_content)
314
+ entities["ResourceType"].extend(resource_types)
315
+
316
+ # Add CPU as resource type if system CPU metrics are present
317
+ if re.search(r'\"system\":\s*\{\s*\"cpu\":', log_content):
318
+ entities["ResourceType"].append("CPU")
319
+
320
+ # SessionID patterns - starts with session_id= or sessionId= and ends with space or comma
321
+ session_id_patterns = re.findall(r'session(?:_id|Id)=([a-zA-Z0-9-]+)(?:\s|,|$)', log_content)
322
+ entities["SessionID"].extend(session_id_patterns)
323
+
324
+ # Extract session numbers - starts with session space and ends with space
325
+ session_numbers = re.findall(r'session\s+(\d+)(?:\s|$)', log_content)
326
+ entities["SessionID"].extend(session_numbers)
327
+
328
+ # Extract ses= format SessionIDs from audit logs
329
+ ses_patterns = re.findall(r'ses=(\d+)(?:\s|,|$)', log_content)
330
+ entities["SessionID"].extend(ses_patterns)
331
+
332
+ # Object patterns - starts with object= and ends with space or comma
333
+ object_patterns = re.findall(r'object=([a-zA-Z0-9_-]+)(?:\s|,|$)', log_content)
334
+ entities["Object"].extend(object_patterns)
335
+
336
+ # Extract unit names from systemd logs
337
+ unit_objects = re.findall(r'unit=([a-zA-Z0-9_-]+)(?:\s|,|$)', log_content)
338
+ entities["Object"].extend(unit_objects)
339
+
340
+ # Application specific patterns - starts with app= and ends with space or comma
341
+ app_specific_patterns = re.findall(r'app=([a-zA-Z0-9_-]+)(?:\s|,|$)', log_content)
342
+ entities["ApplicationSpecific"].extend(app_specific_patterns)
343
+
344
+ # Extract specific applications from content
345
+ if "OpenVPN" in log_content:
346
+ entities["ApplicationSpecific"].append("OpenVPN")
347
+
348
+ if "metricbeat" in log_content:
349
+ entities["ApplicationSpecific"].append("metricbeat")
350
+
351
+ # Role patterns - starts with role= and ends with space or comma
352
+ role_patterns = re.findall(r'role=([a-zA-Z0-9_-]+)(?:\s|,|$)', log_content)
353
+ entities["Role"].extend(role_patterns)
354
+
355
+ # Metadata patterns - starts with metadata={ and ends with }
356
+ metadata_patterns = re.findall(r'metadata=\{([^}]+)\}', log_content)
357
+ entities["Metadata"].extend(metadata_patterns)
358
+
359
+ # Resource usage patterns - CPU and memory metrics
360
+ cpu_usage = re.findall(r'\"cpu\":\s*{\s*\"pct\":\s*([0-9.]+)', log_content)
361
+ if cpu_usage:
362
+ entities["ResourceUsage"].extend([f"CPU: {usage}%" for usage in cpu_usage])
363
+
364
+ # Remove duplicates
365
+ for entity_type in list(entities.keys()):
366
+ if entities[entity_type]:
367
+ entities[entity_type] = list(set(entities[entity_type]))
368
+ else:
369
+ # Remove empty entity types from output
370
+ del entities[entity_type]
371
+
372
+ return entities
app.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify, render_template
2
+ from analyzer import extract_entities
3
+
4
+ app = Flask(__name__)
5
+
6
+ @app.route('/')
7
+ def index():
8
+ return render_template('index.html')
9
+
10
+ @app.route('/analyze', methods=['POST'])
11
+ def analyze():
12
+ if 'file' not in request.files:
13
+ return jsonify({'error': 'No file part'})
14
+
15
+ file = request.files['file']
16
+ if file.filename == '':
17
+ return jsonify({'error': 'No selected file'})
18
+
19
+ log_content = file.read().decode('utf-8', errors='replace')
20
+ entities = extract_entities(log_content)
21
+ return jsonify(entities)
22
+
23
+ app.run(host='0.0.0.0', port=8000, debug=True)
docker-compose.yml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ version: '3'
2
+
3
+ services:
4
+ log-analyzer:
5
+ build: .
6
+ ports:
7
+ - "8000:8000"
8
+ volumes:
9
+ - ./:/app
10
+ restart: unless-stopped
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ Flask==2.2.3
2
+ Werkzeug==2.2.3
3
+ gunicorn==20.1.0
templates/index.html ADDED
@@ -0,0 +1,250 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>Log Analyzer</title>
5
+ <style>
6
+ body {
7
+ font-family: Arial, sans-serif;
8
+ max-width: 800px;
9
+ margin: 0 auto;
10
+ padding: 20px;
11
+ }
12
+ h1 {
13
+ color: #333;
14
+ text-align: center;
15
+ }
16
+ .upload-container {
17
+ border: 2px dashed #ccc;
18
+ padding: 20px;
19
+ text-align: center;
20
+ margin: 20px 0;
21
+ border-radius: 5px;
22
+ }
23
+ #file-input {
24
+ margin: 10px 0;
25
+ }
26
+ button {
27
+ background-color: #4CAF50;
28
+ color: white;
29
+ padding: 10px 15px;
30
+ border: none;
31
+ border-radius: 4px;
32
+ cursor: pointer;
33
+ font-size: 16px;
34
+ }
35
+ button:hover {
36
+ background-color: #45a049;
37
+ }
38
+ #results {
39
+ margin-top: 30px;
40
+ }
41
+ .entity-group {
42
+ margin-bottom: 20px;
43
+ }
44
+ .entity-title {
45
+ font-weight: bold;
46
+ margin-bottom: 5px;
47
+ }
48
+ pre {
49
+ background-color: #f5f5f5;
50
+ padding: 15px;
51
+ border-radius: 5px;
52
+ overflow-x: auto;
53
+ }
54
+ .loading {
55
+ display: none;
56
+ text-align: center;
57
+ margin: 20px 0;
58
+ }
59
+ table {
60
+ width: 100%;
61
+ border-collapse: collapse;
62
+ margin-bottom: 20px;
63
+ }
64
+ th, td {
65
+ border: 1px solid #ddd;
66
+ padding: 8px;
67
+ text-align: left;
68
+ }
69
+ th {
70
+ background-color: #f2f2f2;
71
+ }
72
+ tr:nth-child(even) {
73
+ background-color: #f9f9f9;
74
+ }
75
+ .view-controls {
76
+ display: flex;
77
+ gap: 10px;
78
+ margin-bottom: 15px;
79
+ }
80
+ .view-controls button {
81
+ background-color: #555;
82
+ }
83
+ .view-controls button.active {
84
+ background-color: #4CAF50;
85
+ }
86
+ .view {
87
+ display: none;
88
+ }
89
+ .view.active {
90
+ display: block;
91
+ }
92
+ #json-view {
93
+ white-space: pre-wrap;
94
+ }
95
+ </style>
96
+ </head>
97
+ <body>
98
+ <h1>Log File Analyzer</h1>
99
+
100
+ <div class="upload-container">
101
+ <h2>Upload Log File</h2>
102
+ <form id="upload-form" enctype="multipart/form-data">
103
+ <input type="file" id="file-input" name="file" accept=".log,.txt">
104
+ <div>
105
+ <button type="submit">Analyze Log</button>
106
+ </div>
107
+ </form>
108
+ </div>
109
+
110
+ <div id="loading" class="loading">
111
+ <p>Analyzing log file... Please wait.</p>
112
+ </div>
113
+
114
+ <div id="results"></div>
115
+
116
+ <script>
117
+ document.getElementById('upload-form').addEventListener('submit', function(e) {
118
+ e.preventDefault();
119
+
120
+ const fileInput = document.getElementById('file-input');
121
+ if (!fileInput.files.length) {
122
+ alert('Please select a file to analyze');
123
+ return;
124
+ }
125
+
126
+ const formData = new FormData();
127
+ formData.append('file', fileInput.files[0]);
128
+
129
+ // Show loading indicator
130
+ document.getElementById('loading').style.display = 'block';
131
+ document.getElementById('results').innerHTML = '';
132
+
133
+ fetch('/analyze', {
134
+ method: 'POST',
135
+ body: formData
136
+ })
137
+ .then(response => response.json())
138
+ .then(data => {
139
+ // Hide loading indicator
140
+ document.getElementById('loading').style.display = 'none';
141
+
142
+ // Display results
143
+ const resultsDiv = document.getElementById('results');
144
+
145
+ if (Object.keys(data).length === 0) {
146
+ resultsDiv.innerHTML = '<p>No entities found in the log file.</p>';
147
+ return;
148
+ }
149
+
150
+ let resultsHTML = '<h2>Analysis Results</h2>';
151
+
152
+ // Add view controls
153
+ resultsHTML += `
154
+ <div class="view-controls">
155
+ <button id="list-view-btn" class="active">List View</button>
156
+ <button id="table-view-btn">Table View</button>
157
+ <button id="json-view-btn">Raw JSON</button>
158
+ </div>
159
+ `;
160
+
161
+ // List view (original)
162
+ resultsHTML += '<div id="list-view" class="view active">';
163
+ for (const [entityType, entities] of Object.entries(data)) {
164
+ if (entities.length > 0) {
165
+ resultsHTML += `
166
+ <div class="entity-group">
167
+ <div class="entity-title">${entityType} (${entities.length})</div>
168
+ <pre>${entities.join('\n')}</pre>
169
+ </div>
170
+ `;
171
+ }
172
+ }
173
+ resultsHTML += '</div>';
174
+
175
+ // Table view
176
+ resultsHTML += '<div id="table-view" class="view">';
177
+ for (const [entityType, entities] of Object.entries(data)) {
178
+ if (entities.length > 0) {
179
+ resultsHTML += `
180
+ <div class="entity-group">
181
+ <div class="entity-title">${entityType} (${entities.length})</div>
182
+ <table>
183
+ <thead>
184
+ <tr>
185
+ <th>#</th>
186
+ <th>Value</th>
187
+ </tr>
188
+ </thead>
189
+ <tbody>
190
+ `;
191
+
192
+ entities.forEach((entity, index) => {
193
+ resultsHTML += `
194
+ <tr>
195
+ <td>${index + 1}</td>
196
+ <td>${entity}</td>
197
+ </tr>
198
+ `;
199
+ });
200
+
201
+ resultsHTML += `
202
+ </tbody>
203
+ </table>
204
+ </div>
205
+ `;
206
+ }
207
+ }
208
+ resultsHTML += '</div>';
209
+
210
+ // JSON view
211
+ resultsHTML += `
212
+ <div id="json-view" class="view">
213
+ <pre>${JSON.stringify(data, null, 2)}</pre>
214
+ </div>
215
+ `;
216
+
217
+ resultsDiv.innerHTML = resultsHTML;
218
+
219
+ // Add event listeners for view controls
220
+ document.getElementById('list-view-btn').addEventListener('click', function() {
221
+ setActiveView('list-view');
222
+ });
223
+
224
+ document.getElementById('table-view-btn').addEventListener('click', function() {
225
+ setActiveView('table-view');
226
+ });
227
+
228
+ document.getElementById('json-view-btn').addEventListener('click', function() {
229
+ setActiveView('json-view');
230
+ });
231
+ })
232
+ .catch(error => {
233
+ document.getElementById('loading').style.display = 'none';
234
+ console.error('Error:', error);
235
+ document.getElementById('results').innerHTML = '<p>Error analyzing log file. Please try again.</p>';
236
+ });
237
+ });
238
+
239
+ function setActiveView(viewId) {
240
+ // Remove active class from all views and buttons
241
+ document.querySelectorAll('.view').forEach(el => el.classList.remove('active'));
242
+ document.querySelectorAll('.view-controls button').forEach(el => el.classList.remove('active'));
243
+
244
+ // Add active class to selected view and button
245
+ document.getElementById(viewId).classList.add('active');
246
+ document.getElementById(viewId + '-btn').classList.add('active');
247
+ }
248
+ </script>
249
+ </body>
250
+ </html>