import torch from transformers import AutoModelForCausalLM, AutoTokenizer import json import logging import os logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class LogAnalyzer: def __init__(self, model_id="Qwen/Qwen2.5-1.5B-Instruct", hf_token=None): self.model_id = model_id self.device = "cuda" if torch.cuda.is_available() else "cpu" self.model = None self.tokenizer = None if hf_token is None: hf_token = os.getenv("HF_TOKEN") self.hf_token = hf_token self._load_model() def _load_model(self): try: logger.info(f"Loading model {self.model_id} on device: {self.device}") tokenizer_kwargs = { "trust_remote_code": True } model_kwargs = { "torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32, "device_map": "auto" if torch.cuda.is_available() else None, "trust_remote_code": True, "low_cpu_mem_usage": True } if self.hf_token: tokenizer_kwargs["token"] = self.hf_token model_kwargs["token"] = self.hf_token logger.info("Using Hugging Face token for authentication") self.tokenizer = AutoTokenizer.from_pretrained( self.model_id, **tokenizer_kwargs ) self.model = AutoModelForCausalLM.from_pretrained( self.model_id, **model_kwargs ) if not torch.cuda.is_available(): self.model = self.model.to(self.device) logger.info("Model loaded successfully") except Exception as e: logger.error(f"Error loading model: {e}") self.model = None self.tokenizer = None raise def analyze_logs(self, log_data: dict) -> str: try: if self.model is None or self.tokenizer is None: raise RuntimeError("Model not loaded. Please check model initialization.") if not isinstance(log_data, dict): raise ValueError("log_data must be a dictionary") prompt = f"""Analyze the following Queue Management Log and provide actionable insights and recommendations. Log Data: {json.dumps(log_data, indent=2)} Please provide: 1. A summary of the branch performance. 2. Identification of any bottlenecks or issues. 3. Specific recommendations to improve efficiency. 4. Predicted impact of the recommendations.""" messages = [ {"role": "system", "content": "You are an expert AI Queue Management Consultant."}, {"role": "user", "content": prompt} ] text = self.tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) model_inputs = self.tokenizer([text], return_tensors="pt").to(self.device) with torch.no_grad(): generated_ids = self.model.generate( **model_inputs, max_new_tokens=512, temperature=0.7, do_sample=True, pad_token_id=self.tokenizer.eos_token_id ) generated_ids = [ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) ] response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] return response.strip() except Exception as e: logger.error(f"Error analyzing logs: {e}") return f"Error during analysis: {str(e)}" if __name__ == "__main__": # Test with provided logs sample_log = { "date": "2026-01-24", "branch": "SBI Jabalpur", "avg_wait_time_sec": 420, "max_wait_time_sec": 980, "customers_served": 134, "counter_1_avg_service": 180, "counter_2_avg_service": 310, "peak_hour": "12:00-13:00", "queue_overflow_events": 5 } # Note: Loading the model might take time and memory. # For the sake of this script, we'll just print the prompt it would use. print("LLM Analyzer initialized. Ready to process logs.")