import torch from transformers import AutoTokenizer, AutoModelForCausalLM from rag.search import search_context import os BASE_DIR = os.path.dirname(os.path.dirname(__file__)) MODEL_PATH = os.path.join(BASE_DIR, "model", "final") tokenizer = AutoTokenizer.from_pretrained( MODEL_PATH, local_files_only=True, trust_remote_code=True, fix_mistral_regex=True ) model = AutoModelForCausalLM.from_pretrained( MODEL_PATH, local_files_only=True, trust_remote_code=True, device_map="auto", dtype=torch.float16 ) def analyze(user_input: str): context = search_context(user_input) prompt = f""" You are a cybersecurity malware analysis assistant. Respond ONLY in valid JSON. Use these fields exactly once: - reasoning (array of strings) - indicators (array) - confidence (float 0-1) - recommendation (string) - mitre_attack (array) Context: {context} Input: {user_input} Response: """ inputs = tokenizer(prompt, return_tensors="pt").to(model.device) with torch.no_grad(): output = model.generate( **inputs, max_new_tokens=256, do_sample=True, temperature=0.2, top_p=0.9 ) return tokenizer.decode(output[0], skip_special_tokens=True)