Spaces:

madox81
/

Cyber_Insight

Sleeping

File size: 5,290 Bytes

1494935
dfc0ed9
0d266d1
 
6efc73e
 
0ed0004
4382295
6efc73e
051f76a
 
81dfe36
 
051f76a
 
 
 
 
81dfe36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
051f76a
81dfe36
051f76a
 
 
 
 
81dfe36
 
051f76a
 
ee4304e
6c8ab88
0d266d1
 
2b01ff9
0d266d1
 
 
086eff6
0d266d1
 
 
 
 
 
ca66453
0d266d1
 
51acc9b
7df3b45
333dd2f
dfc0ed9
ee4304e
6efc73e
2b01ff9
051f76a
 
 
 
 
0d266d1
2b01ff9
051f76a
 
2b01ff9
ee4304e
2b01ff9
ee4304e
2b01ff9
444d5dc
2b01ff9
0d266d1
 
 
2b01ff9
 
0d266d1
 
 
 
 
 
6c8ab88
ce12012
0d266d1
 
ce12012
ee4304e
0d266d1
d71bb98
 
 
 
 
 
 
 
2b01ff9
ee4304e
2b01ff9
d71bb98
 
2b01ff9
 
0d266d1
2b01ff9
 
 
 
 
ee4304e
2b01ff9
6c8ab88
 
2b01ff9
ee4304e
 
2b01ff9
6c8ab88
2b01ff9
 
ee4304e
ce12012
ee4304e
2b01ff9
 
ee4304e
 
 
2b01ff9
 
 
 
ee4304e
a68d833
3362444
13a6f56
b9cd446

# --- Imports ---
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download

# --- Configuration ---
REPO_ID = "madox81/SmolLM2-Cyber-Insight-GGUF"
GGUF_FILENAME = "SmolLM2-1.7b-Instruct.Q4_K_M.gguf" 

# --- Instructions Setup ---
MITRE_INSTRUCT = """You are a cybersecurity threat analysis assistant.

Analyze the following sequence of security events as a single coordinated attack.

Tasks:
1. Identify all relevant MITRE ATT&CK tactics
2. Identify all relevant MITRE ATT&CK techniques (use names, not IDs)
3. Consider the full sequence, not individual events
4. Include only behaviors clearly supported by the events

Strict Rules:
- Only include techniques that are directly observable from the events
- Do NOT assume or infer techniques without clear evidence
- Do NOT include phishing unless an email is explicitly mentioned
- Do NOT include PowerShell unless explicitly stated
- Distinguish carefully between:
  • Command and Control (communication)
  • Exfiltration (data theft)
- If uncertain, omit the technique rather than guess

Allowed Tactics (use only these):
- Initial Access
- Execution
- Persistence
- Privilege Escalation
- Defense Evasion
- Credential Access
- Discovery
- Lateral Movement
- Collection
- Command and Control
- Exfiltration
- Impact

Return ONLY valid JSON:
{
  "tactics": [...],
  "techniques": [...]
}

Validation Step:
Before producing the final answer, internally verify that each technique is directly supported by at least one event.
"""

# --- LLM Class ---
class LLM:
    def __init__(self, repo_id, gguf_filename):
        print("Downloading model (if not cached)...")
        
        # Download the GGUF file specifically
        model_path = hf_hub_download(
            repo_id=repo_id,
            filename=gguf_filename,
        )
        
        print("Loading model with llama.cpp...")
        
        # Initialize Llama
        # n_ctx: Context window. Smaller uses less RAM.
        # n_threads: Free HF Spaces have 2 CPUs. Setting this to 2 is optimal.
        self.llm = Llama(
            model_path=model_path,
            n_ctx=1024,      
            n_threads=2,  
            verbose=True
        )
        print("Model loaded!")

    def generate_resp(self, user_input, task_type):
        
        def format_events(user_input):
            lines = [l.strip() for l in user_input.split("\n") if l.strip()]
            return " ".join(lines)
            
        # 1. Construct the System/User prompt logic
        if task_type == "MITRE Mapping":
            # instruction = "Map the following security event to MITRE ATT&CK tactics and techniques."
            instruction = MITRE_INSTRUCT
        elif task_type == "Severity Assessment":
            instruction = "Assess the severity and business risk of the following incident."
        else:
            instruction = "Analyze the following:"

        formatted_message = f"{instruction}\n\nInput:\n{format_events(user_input)}"

        # 2. Prepare messages for Chat Template
        # SmolLM2 uses a specific chat template (usually ChatML or similar).
        # create_chat_completion handles formatting automatically.
        messages = [{"role": "user", "content": formatted_message}]

        # 3. Generate
        response = self.llm.create_chat_completion(
            messages=messages,
            max_tokens=256,
            temperature=0.0, 
            repeat_penalty=1.15,
        )

        # 4. Extract content
        return response['choices'][0]['message']['content'].strip()

# --- Initialize ---
# Ensure you update the GGUF_FILENAME variable above to match your file!
llm_instance = None

def get_llm():
    global llm_instance
    if llm_instance is None:
        print("Initializing model...")
        llm_instance = LLM(REPO_ID, GGUF_FILENAME)
    return llm_instance

# --- Gradio Interface ---
def process_input(user_input, task_type):
    llm = get_llm()
    return llm.generate_resp(user_input, task_type)

with gr.Blocks(title="SmolLM2-Cyber-Insight") as demo:
    gr.Markdown("# 🛡️ SmolLM2-Cyber-Insight (Optimized GGUF)")
    
    with gr.Row():
        with gr.Column(scale=2):
            task_selector = gr.Dropdown(
                label="Select Task Type",
                choices=["MITRE Mapping", "Severity Assessment"],
                value="MITRE Mapping"
            )
            
            input_box = gr.Textbox(
                label="Input Data",
                placeholder="Paste log, procedure, or incident description here...",
                lines=5
            )
            
            submit_btn = gr.Button("Analyze")
            output_box = gr.Textbox(label="Model Response (JSON)", lines=5)

    gr.Markdown("### Examples")
    gr.Examples(
        examples=[
            ["MITRE Mapping", "selection: CommandLine contains 'Invoke-Expression'"],
            ["MITRE Mapping", "Incident Type: Ransomware\nTarget: Finance Server"],
            ["Severity Assessment", "Incident: Ransomware affecting Finance Server."]
        ],
        inputs=[task_selector, input_box]
    )

    submit_btn.click(fn=process_input, inputs=[input_box, task_selector], outputs=output_box)

if __name__ == "__main__":
    print('App Started')
    demo.queue().launch(ssr_mode=False)