Spaces:
Paused
Paused
Update app_udm.py
Browse files- app_udm.py +83 -76
app_udm.py
CHANGED
|
@@ -1,31 +1,58 @@
|
|
| 1 |
-
# You can find this code for Chainlit python streaming here (https://docs.chainlit.io/concepts/streaming/python)
|
| 2 |
-
|
| 3 |
-
# OpenAI Chat completion
|
| 4 |
-
import os
|
| 5 |
-
from openai import AsyncOpenAI # importing openai for API usage
|
| 6 |
-
import chainlit as cl # importing chainlit for our app
|
| 7 |
-
from chainlit.prompt import Prompt, PromptMessage # importing prompt tools
|
| 8 |
-
from chainlit.playground.providers import ChatOpenAI # importing ChatOpenAI tools
|
| 9 |
-
from dotenv import load_dotenv
|
| 10 |
-
|
| 11 |
-
load_dotenv()
|
| 12 |
import pandas as pd
|
| 13 |
import os
|
| 14 |
-
|
| 15 |
import chainlit as cl
|
| 16 |
-
from chainlit.prompt import Prompt, PromptMessage
|
| 17 |
from dotenv import load_dotenv
|
| 18 |
|
| 19 |
# Load environment variables
|
| 20 |
load_dotenv()
|
| 21 |
|
| 22 |
-
|
| 23 |
# Load UDM fields CSV file
|
| 24 |
def load_udm_fields(csv_path):
|
| 25 |
return pd.read_csv(csv_path)
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
# Map log fields to UDM fields
|
| 28 |
-
def map_log_fields_to_udm(log_fields, udm_fields):
|
| 29 |
mapped_fields = []
|
| 30 |
for field in log_fields:
|
| 31 |
# Try to find a matching UDM field
|
|
@@ -36,63 +63,47 @@ def map_log_fields_to_udm(log_fields, udm_fields):
|
|
| 36 |
'UDM_Field': udm_match.iloc[0]['Field Name']
|
| 37 |
})
|
| 38 |
else:
|
| 39 |
-
# If no direct match,
|
|
|
|
| 40 |
mapped_fields.append({
|
| 41 |
'Log_Field': field,
|
| 42 |
-
'UDM_Field': 'custom_fields.' + field
|
| 43 |
})
|
| 44 |
return pd.DataFrame(mapped_fields)
|
| 45 |
|
| 46 |
-
#
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
srcip | principal.ip
|
| 74 |
-
dstip | target.ip
|
| 75 |
-
dstport | target.port
|
| 76 |
-
|
| 77 |
-
### Example 2: Palo Alto Fields to UDM Mapping
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
Log Attribute | UDM Attribute
|
| 82 |
-
--------------|---------------
|
| 83 |
-
src_ip | principal.ip
|
| 84 |
-
dest_ip | target.ip
|
| 85 |
-
dest_port | target.port
|
| 86 |
-
action | security_result.action_details
|
| 87 |
-
severity | security_result.severity_details
|
| 88 |
-
|
| 89 |
-
Now proceed to map the given sample log:
|
| 90 |
"""
|
| 91 |
|
| 92 |
@cl.on_chat_start # Marks a function that will be executed at the start of a user session
|
| 93 |
async def start_chat():
|
| 94 |
settings = {
|
| 95 |
-
"model": "gpt-
|
| 96 |
"temperature": 0,
|
| 97 |
"max_tokens": 500,
|
| 98 |
"top_p": 1,
|
|
@@ -109,25 +120,21 @@ async def main(message: cl.Message):
|
|
| 109 |
udm_fields_csv = "udm_field_list_v2.csv" # Replace with your actual CSV path
|
| 110 |
udm_fields = load_udm_fields(udm_fields_csv)
|
| 111 |
|
| 112 |
-
#
|
| 113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
|
| 115 |
# Perform the mapping
|
| 116 |
-
mapped_fields_df = map_log_fields_to_udm(
|
| 117 |
|
| 118 |
# Create a response showing the mapping
|
| 119 |
-
mapped_fields_table =
|
| 120 |
-
|
| 121 |
-
prompt = Prompt(
|
| 122 |
-
provider=ChatOpenAI.id,
|
| 123 |
-
messages=[
|
| 124 |
-
PromptMessage(role="system", template=system_template, formatted=system_template),
|
| 125 |
-
PromptMessage(role="user", template=user_template, formatted=user_template.format(input=message.content)),
|
| 126 |
-
],
|
| 127 |
-
inputs={"input": message.content},
|
| 128 |
-
settings=settings,
|
| 129 |
-
)
|
| 130 |
|
|
|
|
| 131 |
msg = cl.Message(content=f"Here is the mapped log fields to UDM:\n\n{mapped_fields_table}")
|
| 132 |
await msg.send()
|
| 133 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import pandas as pd
|
| 2 |
import os
|
| 3 |
+
import openai
|
| 4 |
import chainlit as cl
|
|
|
|
| 5 |
from dotenv import load_dotenv
|
| 6 |
|
| 7 |
# Load environment variables
|
| 8 |
load_dotenv()
|
| 9 |
|
|
|
|
| 10 |
# Load UDM fields CSV file
|
| 11 |
def load_udm_fields(csv_path):
|
| 12 |
return pd.read_csv(csv_path)
|
| 13 |
|
| 14 |
+
# Use OpenAI to assist in mapping log fields that are not directly mapped
|
| 15 |
+
async def ai_assisted_mapping(log_field, vendor, app, udm_fields):
|
| 16 |
+
# Few-shot learning examples included in the prompt, adaptable for any vendor/app
|
| 17 |
+
prompt = f"""
|
| 18 |
+
You are a cybersecurity expert specialized in {vendor} logs for {app} applications.
|
| 19 |
+
The log field '{log_field}' doesn't have a direct mapping in Google Chronicle UDM.
|
| 20 |
+
|
| 21 |
+
Here are examples of how fields from other vendors are mapped to UDM:
|
| 22 |
+
|
| 23 |
+
### Example 1: Fortinet Fields to UDM Mapping
|
| 24 |
+
Log Attribute | UDM Attribute
|
| 25 |
+
--------------|---------------
|
| 26 |
+
devname | intermediary.hostname
|
| 27 |
+
devid | intermediary.asset.hardware.serial_number
|
| 28 |
+
srcip | principal.ip
|
| 29 |
+
dstip | target.ip
|
| 30 |
+
dstport | target.port
|
| 31 |
+
|
| 32 |
+
### Example 2: Palo Alto Fields to UDM Mapping
|
| 33 |
+
Log Attribute | UDM Attribute
|
| 34 |
+
--------------|---------------
|
| 35 |
+
src_ip | principal.ip
|
| 36 |
+
dest_ip | target.ip
|
| 37 |
+
dest_port | target.port
|
| 38 |
+
action | security_result.action_details
|
| 39 |
+
severity | security_result.severity_details
|
| 40 |
+
|
| 41 |
+
Now, map the log field '{log_field}' from {vendor} {app} to the best possible UDM attribute.
|
| 42 |
+
Reference the provided UDM fields: {', '.join(udm_fields['Field Name'].tolist())}.
|
| 43 |
+
|
| 44 |
+
If no exact match, provide the closest UDM mapping or suggest a custom field (e.g., custom_fields.{log_field}).
|
| 45 |
+
"""
|
| 46 |
+
response = openai.Completion.create(
|
| 47 |
+
engine="gpt-4", # Use GPT-4 model
|
| 48 |
+
prompt=prompt,
|
| 49 |
+
max_tokens=100,
|
| 50 |
+
temperature=0.2
|
| 51 |
+
)
|
| 52 |
+
return response.choices[0].text.strip()
|
| 53 |
+
|
| 54 |
# Map log fields to UDM fields
|
| 55 |
+
async def map_log_fields_to_udm(log_fields, vendor, app, udm_fields):
|
| 56 |
mapped_fields = []
|
| 57 |
for field in log_fields:
|
| 58 |
# Try to find a matching UDM field
|
|
|
|
| 63 |
'UDM_Field': udm_match.iloc[0]['Field Name']
|
| 64 |
})
|
| 65 |
else:
|
| 66 |
+
# If no direct match, call OpenAI to suggest a mapping with few-shot examples
|
| 67 |
+
ai_suggestion = await ai_assisted_mapping(field, vendor, app, udm_fields)
|
| 68 |
mapped_fields.append({
|
| 69 |
'Log_Field': field,
|
| 70 |
+
'UDM_Field': ai_suggestion or 'custom_fields.' + field
|
| 71 |
})
|
| 72 |
return pd.DataFrame(mapped_fields)
|
| 73 |
|
| 74 |
+
# Function to format the DataFrame into a readable string format for display
|
| 75 |
+
def format_mapped_fields_for_display(mapped_fields_df):
|
| 76 |
+
# Convert the DataFrame to a formatted string
|
| 77 |
+
result = "Log Field | UDM Field\n"
|
| 78 |
+
result += "-----------------------\n"
|
| 79 |
+
for _, row in mapped_fields_df.iterrows():
|
| 80 |
+
result += f"{row['Log_Field']} | {row['UDM_Field']}\n"
|
| 81 |
+
return result
|
| 82 |
+
|
| 83 |
+
# Enhanced user prompt to guide the user
|
| 84 |
+
user_template = """To help with the mapping of your log fields to Google Chronicle's Unified Data Model (UDM),
|
| 85 |
+
please provide the following information:
|
| 86 |
+
|
| 87 |
+
1. **Vendor**: What is the vendor for this log (e.g., Palo Alto, Fortinet, etc.)?
|
| 88 |
+
2. **Application**: Which application does the log come from (e.g., Firewall, IDS, etc.)?
|
| 89 |
+
3. **Log Fields**: List the log fields you want to map (e.g., src_ip, dest_ip, action, etc.).
|
| 90 |
+
|
| 91 |
+
Example Input:
|
| 92 |
+
Vendor: Palo Alto
|
| 93 |
+
Application: Firewall
|
| 94 |
+
Log Fields:
|
| 95 |
+
src_ip
|
| 96 |
+
dest_ip
|
| 97 |
+
action
|
| 98 |
+
severity
|
| 99 |
+
|
| 100 |
+
Please provide this information in the format shown above, and I will help map the fields to UDM.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
"""
|
| 102 |
|
| 103 |
@cl.on_chat_start # Marks a function that will be executed at the start of a user session
|
| 104 |
async def start_chat():
|
| 105 |
settings = {
|
| 106 |
+
"model": "gpt-4", # Use GPT-4 here
|
| 107 |
"temperature": 0,
|
| 108 |
"max_tokens": 500,
|
| 109 |
"top_p": 1,
|
|
|
|
| 120 |
udm_fields_csv = "udm_field_list_v2.csv" # Replace with your actual CSV path
|
| 121 |
udm_fields = load_udm_fields(udm_fields_csv)
|
| 122 |
|
| 123 |
+
# Parse user input to dynamically capture log fields, vendor, and app
|
| 124 |
+
user_input = message.content.split("\n")
|
| 125 |
+
|
| 126 |
+
# Example: The first line contains the vendor, second the app, and remaining lines are log fields
|
| 127 |
+
vendor_line = user_input[0].split(":")[1].strip() # Extract Vendor
|
| 128 |
+
app_line = user_input[1].split(":")[1].strip() # Extract Application
|
| 129 |
+
log_lines = [line.strip() for line in user_input[2:] if line.strip()] # Extract log fields
|
| 130 |
|
| 131 |
# Perform the mapping
|
| 132 |
+
mapped_fields_df = await map_log_fields_to_udm(log_lines, vendor_line, app_line, udm_fields)
|
| 133 |
|
| 134 |
# Create a response showing the mapping
|
| 135 |
+
mapped_fields_table = format_mapped_fields_for_display(mapped_fields_df)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
|
| 137 |
+
# Display the output to the user
|
| 138 |
msg = cl.Message(content=f"Here is the mapped log fields to UDM:\n\n{mapped_fields_table}")
|
| 139 |
await msg.send()
|
| 140 |
|