Spaces:
Running
Running
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import json | |
| import logging | |
| import os | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| class LogAnalyzer: | |
| def __init__(self, model_id="Qwen/Qwen2.5-1.5B-Instruct", hf_token=None): | |
| self.model_id = model_id | |
| self.device = "cuda" if torch.cuda.is_available() else "cpu" | |
| self.model = None | |
| self.tokenizer = None | |
| if hf_token is None: | |
| hf_token = os.getenv("HF_TOKEN") | |
| self.hf_token = hf_token | |
| self._load_model() | |
| def _load_model(self): | |
| try: | |
| logger.info(f"Loading model {self.model_id} on device: {self.device}") | |
| tokenizer_kwargs = { | |
| "trust_remote_code": True | |
| } | |
| model_kwargs = { | |
| "torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32, | |
| "device_map": "auto" if torch.cuda.is_available() else None, | |
| "trust_remote_code": True, | |
| "low_cpu_mem_usage": True | |
| } | |
| if self.hf_token: | |
| tokenizer_kwargs["token"] = self.hf_token | |
| model_kwargs["token"] = self.hf_token | |
| logger.info("Using Hugging Face token for authentication") | |
| self.tokenizer = AutoTokenizer.from_pretrained( | |
| self.model_id, | |
| **tokenizer_kwargs | |
| ) | |
| self.model = AutoModelForCausalLM.from_pretrained( | |
| self.model_id, | |
| **model_kwargs | |
| ) | |
| if not torch.cuda.is_available(): | |
| self.model = self.model.to(self.device) | |
| logger.info("Model loaded successfully") | |
| except Exception as e: | |
| logger.error(f"Error loading model: {e}") | |
| self.model = None | |
| self.tokenizer = None | |
| raise | |
| def analyze_logs(self, log_data: dict) -> str: | |
| try: | |
| if self.model is None or self.tokenizer is None: | |
| raise RuntimeError("Model not loaded. Please check model initialization.") | |
| if not isinstance(log_data, dict): | |
| raise ValueError("log_data must be a dictionary") | |
| prompt = f"""Analyze the following Queue Management Log and provide actionable insights and recommendations. | |
| Log Data: | |
| {json.dumps(log_data, indent=2)} | |
| Please provide: | |
| 1. A summary of the branch performance. | |
| 2. Identification of any bottlenecks or issues. | |
| 3. Specific recommendations to improve efficiency. | |
| 4. Predicted impact of the recommendations.""" | |
| messages = [ | |
| {"role": "system", "content": "You are an expert AI Queue Management Consultant."}, | |
| {"role": "user", "content": prompt} | |
| ] | |
| text = self.tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=False, | |
| add_generation_prompt=True | |
| ) | |
| model_inputs = self.tokenizer([text], return_tensors="pt").to(self.device) | |
| with torch.no_grad(): | |
| generated_ids = self.model.generate( | |
| **model_inputs, | |
| max_new_tokens=512, | |
| temperature=0.7, | |
| do_sample=True, | |
| pad_token_id=self.tokenizer.eos_token_id | |
| ) | |
| generated_ids = [ | |
| output_ids[len(input_ids):] | |
| for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) | |
| ] | |
| response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
| return response.strip() | |
| except Exception as e: | |
| logger.error(f"Error analyzing logs: {e}") | |
| return f"Error during analysis: {str(e)}" | |
| if __name__ == "__main__": | |
| # Test with provided logs | |
| sample_log = { | |
| "date": "2026-01-24", | |
| "branch": "SBI Jabalpur", | |
| "avg_wait_time_sec": 420, | |
| "max_wait_time_sec": 980, | |
| "customers_served": 134, | |
| "counter_1_avg_service": 180, | |
| "counter_2_avg_service": 310, | |
| "peak_hour": "12:00-13:00", | |
| "queue_overflow_events": 5 | |
| } | |
| # Note: Loading the model might take time and memory. | |
| # For the sake of this script, we'll just print the prompt it would use. | |
| print("LLM Analyzer initialized. Ready to process logs.") | |