Spaces:

Galatea007
/

LLMopsDK

Paused

App Files Files Community

Galatea007 commited on Oct 9, 2024

Commit

91ba323

verified ·

1 Parent(s): 0e07bda

Update app_udm.py

Browse files

Files changed (1) hide show

app_udm.py +83 -76

app_udm.py CHANGED Viewed

@@ -1,31 +1,58 @@
-# You can find this code for Chainlit python streaming here (https://docs.chainlit.io/concepts/streaming/python)
-# OpenAI Chat completion
-import os
-from openai import AsyncOpenAI  # importing openai for API usage
-import chainlit as cl  # importing chainlit for our app
-from chainlit.prompt import Prompt, PromptMessage  # importing prompt tools
-from chainlit.playground.providers import ChatOpenAI  # importing ChatOpenAI tools
-from dotenv import load_dotenv
-load_dotenv()
 import pandas as pd
 import os
-from openai import AsyncOpenAI
 import chainlit as cl
-from chainlit.prompt import Prompt, PromptMessage
 from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()
 # Load UDM fields CSV file
 def load_udm_fields(csv_path):
     return pd.read_csv(csv_path)
 # Map log fields to UDM fields
-def map_log_fields_to_udm(log_fields, udm_fields):
     mapped_fields = []
     for field in log_fields:
         # Try to find a matching UDM field
@@ -36,63 +63,47 @@ def map_log_fields_to_udm(log_fields, udm_fields):
                 'UDM_Field': udm_match.iloc[0]['Field Name']
             })
         else:
-            # If no direct match, add to custom fields
             mapped_fields.append({
                 'Log_Field': field,
-                'UDM_Field': 'custom_fields.' + field
             })
     return pd.DataFrame(mapped_fields)
-# Chainlit OpenAI Templates for multi-shot learning
-system_template = """You are a cybersecurity expert specialized in log analysis and data normalization,
-helping security teams map security log fields to Google Chronicle's Unified Data Model (UDM).
-Please follow these steps:
-1. Map each product log field to its corresponding UDM field using the reference UDM CSV provided.
-2. For fields that don't have a direct match in UDM, place them into custom fields.
-3. Ensure each mapped field, including custom fields, is unique and accurate.
-4. Organize the mapping into a structured table format.
-"""
-# Multi-shot learning examples for Fortinet and Palo Alto
-user_template = """Here is a sample log:
-{input}
-Please follow these steps:
-1. Use the provided UDM CSV to map the log fields.
-2. For fields that don't have a direct match, assign them to custom fields.
-3. Organize the mapping into a structured table.
-### Example 1: Fortinet Fields to UDM Mapping
-Log Attribute | UDM Attribute
---------------|---------------
-devname       | intermediary.hostname
-devid         | intermediary.asset.hardware.serial_number
-srcip         | principal.ip
-dstip         | target.ip
-dstport       | target.port
-### Example 2: Palo Alto Fields to UDM Mapping
-Log Attribute | UDM Attribute
---------------|---------------
-src_ip        | principal.ip
-dest_ip       | target.ip
-dest_port     | target.port
-action        | security_result.action_details
-severity      | security_result.severity_details
-Now proceed to map the given sample log:
 """
 @cl.on_chat_start  # Marks a function that will be executed at the start of a user session
 async def start_chat():
     settings = {
-        "model": "gpt-3.5-turbo",
         "temperature": 0,
         "max_tokens": 500,
         "top_p": 1,
@@ -109,25 +120,21 @@ async def main(message: cl.Message):
     udm_fields_csv = "udm_field_list_v2.csv"  # Replace with your actual CSV path
     udm_fields = load_udm_fields(udm_fields_csv)
-    # Simulate log fields from the user's input (in real use case, you'd parse the input log)
-    log_fields = message.content.split()  # Example: Splitting input log into fields for simplicity
     # Perform the mapping
-    mapped_fields_df = map_log_fields_to_udm(log_fields, udm_fields)
     # Create a response showing the mapping
-    mapped_fields_table = mapped_fields_df.to_string(index=False)
-    prompt = Prompt(
-        provider=ChatOpenAI.id,
-        messages=[
-            PromptMessage(role="system", template=system_template, formatted=system_template),
-            PromptMessage(role="user", template=user_template, formatted=user_template.format(input=message.content)),
-        ],
-        inputs={"input": message.content},
-        settings=settings,
-    )
     msg = cl.Message(content=f"Here is the mapped log fields to UDM:\n\n{mapped_fields_table}")
     await msg.send()

 import pandas as pd
 import os
+import openai
 import chainlit as cl
 from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()
 # Load UDM fields CSV file
 def load_udm_fields(csv_path):
     return pd.read_csv(csv_path)
+# Use OpenAI to assist in mapping log fields that are not directly mapped
+async def ai_assisted_mapping(log_field, vendor, app, udm_fields):
+    # Few-shot learning examples included in the prompt, adaptable for any vendor/app
+    prompt = f"""
+    You are a cybersecurity expert specialized in {vendor} logs for {app} applications.
+    The log field '{log_field}' doesn't have a direct mapping in Google Chronicle UDM.
+    Here are examples of how fields from other vendors are mapped to UDM:
+    ### Example 1: Fortinet Fields to UDM Mapping
+    Log Attribute | UDM Attribute
+    --------------|---------------
+    devname       | intermediary.hostname
+    devid         | intermediary.asset.hardware.serial_number
+    srcip         | principal.ip
+    dstip         | target.ip
+    dstport       | target.port
+    ### Example 2: Palo Alto Fields to UDM Mapping
+    Log Attribute | UDM Attribute
+    --------------|---------------
+    src_ip        | principal.ip
+    dest_ip       | target.ip
+    dest_port     | target.port
+    action        | security_result.action_details
+    severity      | security_result.severity_details
+    Now, map the log field '{log_field}' from {vendor} {app} to the best possible UDM attribute.
+    Reference the provided UDM fields: {', '.join(udm_fields['Field Name'].tolist())}.
+    If no exact match, provide the closest UDM mapping or suggest a custom field (e.g., custom_fields.{log_field}).
+    """
+    response = openai.Completion.create(
+        engine="gpt-4",  # Use GPT-4 model
+        prompt=prompt,
+        max_tokens=100,
+        temperature=0.2
+    )
+    return response.choices[0].text.strip()
 # Map log fields to UDM fields
+async def map_log_fields_to_udm(log_fields, vendor, app, udm_fields):
     mapped_fields = []
     for field in log_fields:
         # Try to find a matching UDM field
                 'UDM_Field': udm_match.iloc[0]['Field Name']
             })
         else:
+            # If no direct match, call OpenAI to suggest a mapping with few-shot examples
+            ai_suggestion = await ai_assisted_mapping(field, vendor, app, udm_fields)
             mapped_fields.append({
                 'Log_Field': field,
+                'UDM_Field': ai_suggestion or 'custom_fields.' + field
             })
     return pd.DataFrame(mapped_fields)
+# Function to format the DataFrame into a readable string format for display
+def format_mapped_fields_for_display(mapped_fields_df):
+    # Convert the DataFrame to a formatted string
+    result = "Log Field | UDM Field\n"
+    result += "-----------------------\n"
+    for _, row in mapped_fields_df.iterrows():
+        result += f"{row['Log_Field']} | {row['UDM_Field']}\n"
+    return result
+# Enhanced user prompt to guide the user
+user_template = """To help with the mapping of your log fields to Google Chronicle's Unified Data Model (UDM),
+please provide the following information:
+1. **Vendor**: What is the vendor for this log (e.g., Palo Alto, Fortinet, etc.)?
+2. **Application**: Which application does the log come from (e.g., Firewall, IDS, etc.)?
+3. **Log Fields**: List the log fields you want to map (e.g., src_ip, dest_ip, action, etc.).
+Example Input:
+Vendor: Palo Alto
+Application: Firewall
+Log Fields:
+src_ip
+dest_ip
+action
+severity
+Please provide this information in the format shown above, and I will help map the fields to UDM.
 """
 @cl.on_chat_start  # Marks a function that will be executed at the start of a user session
 async def start_chat():
     settings = {
+        "model": "gpt-4",  # Use GPT-4 here
         "temperature": 0,
         "max_tokens": 500,
         "top_p": 1,
     udm_fields_csv = "udm_field_list_v2.csv"  # Replace with your actual CSV path
     udm_fields = load_udm_fields(udm_fields_csv)
+    # Parse user input to dynamically capture log fields, vendor, and app
+    user_input = message.content.split("\n")
+    # Example: The first line contains the vendor, second the app, and remaining lines are log fields
+    vendor_line = user_input[0].split(":")[1].strip()  # Extract Vendor
+    app_line = user_input[1].split(":")[1].strip()  # Extract Application
+    log_lines = [line.strip() for line in user_input[2:] if line.strip()]  # Extract log fields
     # Perform the mapping
+    mapped_fields_df = await map_log_fields_to_udm(log_lines, vendor_line, app_line, udm_fields)
     # Create a response showing the mapping
+    mapped_fields_table = format_mapped_fields_for_display(mapped_fields_df)
+    # Display the output to the user
     msg = cl.Message(content=f"Here is the mapped log fields to UDM:\n\n{mapped_fields_table}")
     await msg.send()