Spaces:
Sleeping
Sleeping
File size: 5,290 Bytes
1494935 dfc0ed9 0d266d1 6efc73e 0ed0004 4382295 6efc73e 051f76a 81dfe36 051f76a 81dfe36 051f76a 81dfe36 051f76a 81dfe36 051f76a ee4304e 6c8ab88 0d266d1 2b01ff9 0d266d1 086eff6 0d266d1 ca66453 0d266d1 51acc9b 7df3b45 333dd2f dfc0ed9 ee4304e 6efc73e 2b01ff9 051f76a 0d266d1 2b01ff9 051f76a 2b01ff9 ee4304e 2b01ff9 ee4304e 2b01ff9 444d5dc 2b01ff9 0d266d1 2b01ff9 0d266d1 6c8ab88 ce12012 0d266d1 ce12012 ee4304e 0d266d1 d71bb98 2b01ff9 ee4304e 2b01ff9 d71bb98 2b01ff9 0d266d1 2b01ff9 ee4304e 2b01ff9 6c8ab88 2b01ff9 ee4304e 2b01ff9 6c8ab88 2b01ff9 ee4304e ce12012 ee4304e 2b01ff9 ee4304e 2b01ff9 ee4304e a68d833 3362444 13a6f56 b9cd446 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 | # --- Imports ---
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
# --- Configuration ---
REPO_ID = "madox81/SmolLM2-Cyber-Insight-GGUF"
GGUF_FILENAME = "SmolLM2-1.7b-Instruct.Q4_K_M.gguf"
# --- Instructions Setup ---
MITRE_INSTRUCT = """You are a cybersecurity threat analysis assistant.
Analyze the following sequence of security events as a single coordinated attack.
Tasks:
1. Identify all relevant MITRE ATT&CK tactics
2. Identify all relevant MITRE ATT&CK techniques (use names, not IDs)
3. Consider the full sequence, not individual events
4. Include only behaviors clearly supported by the events
Strict Rules:
- Only include techniques that are directly observable from the events
- Do NOT assume or infer techniques without clear evidence
- Do NOT include phishing unless an email is explicitly mentioned
- Do NOT include PowerShell unless explicitly stated
- Distinguish carefully between:
• Command and Control (communication)
• Exfiltration (data theft)
- If uncertain, omit the technique rather than guess
Allowed Tactics (use only these):
- Initial Access
- Execution
- Persistence
- Privilege Escalation
- Defense Evasion
- Credential Access
- Discovery
- Lateral Movement
- Collection
- Command and Control
- Exfiltration
- Impact
Return ONLY valid JSON:
{
"tactics": [...],
"techniques": [...]
}
Validation Step:
Before producing the final answer, internally verify that each technique is directly supported by at least one event.
"""
# --- LLM Class ---
class LLM:
def __init__(self, repo_id, gguf_filename):
print("Downloading model (if not cached)...")
# Download the GGUF file specifically
model_path = hf_hub_download(
repo_id=repo_id,
filename=gguf_filename,
)
print("Loading model with llama.cpp...")
# Initialize Llama
# n_ctx: Context window. Smaller uses less RAM.
# n_threads: Free HF Spaces have 2 CPUs. Setting this to 2 is optimal.
self.llm = Llama(
model_path=model_path,
n_ctx=1024,
n_threads=2,
verbose=True
)
print("Model loaded!")
def generate_resp(self, user_input, task_type):
def format_events(user_input):
lines = [l.strip() for l in user_input.split("\n") if l.strip()]
return " ".join(lines)
# 1. Construct the System/User prompt logic
if task_type == "MITRE Mapping":
# instruction = "Map the following security event to MITRE ATT&CK tactics and techniques."
instruction = MITRE_INSTRUCT
elif task_type == "Severity Assessment":
instruction = "Assess the severity and business risk of the following incident."
else:
instruction = "Analyze the following:"
formatted_message = f"{instruction}\n\nInput:\n{format_events(user_input)}"
# 2. Prepare messages for Chat Template
# SmolLM2 uses a specific chat template (usually ChatML or similar).
# create_chat_completion handles formatting automatically.
messages = [{"role": "user", "content": formatted_message}]
# 3. Generate
response = self.llm.create_chat_completion(
messages=messages,
max_tokens=256,
temperature=0.0,
repeat_penalty=1.15,
)
# 4. Extract content
return response['choices'][0]['message']['content'].strip()
# --- Initialize ---
# Ensure you update the GGUF_FILENAME variable above to match your file!
llm_instance = None
def get_llm():
global llm_instance
if llm_instance is None:
print("Initializing model...")
llm_instance = LLM(REPO_ID, GGUF_FILENAME)
return llm_instance
# --- Gradio Interface ---
def process_input(user_input, task_type):
llm = get_llm()
return llm.generate_resp(user_input, task_type)
with gr.Blocks(title="SmolLM2-Cyber-Insight") as demo:
gr.Markdown("# 🛡️ SmolLM2-Cyber-Insight (Optimized GGUF)")
with gr.Row():
with gr.Column(scale=2):
task_selector = gr.Dropdown(
label="Select Task Type",
choices=["MITRE Mapping", "Severity Assessment"],
value="MITRE Mapping"
)
input_box = gr.Textbox(
label="Input Data",
placeholder="Paste log, procedure, or incident description here...",
lines=5
)
submit_btn = gr.Button("Analyze")
output_box = gr.Textbox(label="Model Response (JSON)", lines=5)
gr.Markdown("### Examples")
gr.Examples(
examples=[
["MITRE Mapping", "selection: CommandLine contains 'Invoke-Expression'"],
["MITRE Mapping", "Incident Type: Ransomware\nTarget: Finance Server"],
["Severity Assessment", "Incident: Ransomware affecting Finance Server."]
],
inputs=[task_selector, input_box]
)
submit_btn.click(fn=process_input, inputs=[input_box, task_selector], outputs=output_box)
if __name__ == "__main__":
print('App Started')
demo.queue().launch(ssr_mode=False) |