File size: 4,578 Bytes
ad1bda5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import json
import logging
import os

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class LogAnalyzer:
    def __init__(self, model_id="Qwen/Qwen2.5-1.5B-Instruct", hf_token=None):
        self.model_id = model_id
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.model = None
        self.tokenizer = None
        
        if hf_token is None:
            hf_token = os.getenv("HF_TOKEN")
        
        self.hf_token = hf_token
        self._load_model()
    
    def _load_model(self):
        try:
            logger.info(f"Loading model {self.model_id} on device: {self.device}")
            
            tokenizer_kwargs = {
                "trust_remote_code": True
            }
            model_kwargs = {
                "torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32,
                "device_map": "auto" if torch.cuda.is_available() else None,
                "trust_remote_code": True,
                "low_cpu_mem_usage": True
            }
            
            if self.hf_token:
                tokenizer_kwargs["token"] = self.hf_token
                model_kwargs["token"] = self.hf_token
                logger.info("Using Hugging Face token for authentication")
            
            self.tokenizer = AutoTokenizer.from_pretrained(
                self.model_id,
                **tokenizer_kwargs
            )
            
            self.model = AutoModelForCausalLM.from_pretrained(
                self.model_id,
                **model_kwargs
            )
            
            if not torch.cuda.is_available():
                self.model = self.model.to(self.device)
            
            logger.info("Model loaded successfully")
        except Exception as e:
            logger.error(f"Error loading model: {e}")
            self.model = None
            self.tokenizer = None
            raise

    def analyze_logs(self, log_data: dict) -> str:
        try:
            if self.model is None or self.tokenizer is None:
                raise RuntimeError("Model not loaded. Please check model initialization.")
            
            if not isinstance(log_data, dict):
                raise ValueError("log_data must be a dictionary")
            
            prompt = f"""Analyze the following Queue Management Log and provide actionable insights and recommendations.

Log Data:
{json.dumps(log_data, indent=2)}

Please provide:
1. A summary of the branch performance.
2. Identification of any bottlenecks or issues.
3. Specific recommendations to improve efficiency.
4. Predicted impact of the recommendations."""
            
            messages = [
                {"role": "system", "content": "You are an expert AI Queue Management Consultant."},
                {"role": "user", "content": prompt}
            ]
            
            text = self.tokenizer.apply_chat_template(
                messages,
                tokenize=False,
                add_generation_prompt=True
            )
            
            model_inputs = self.tokenizer([text], return_tensors="pt").to(self.device)

            with torch.no_grad():
                generated_ids = self.model.generate(
                    **model_inputs,
                    max_new_tokens=512,
                    temperature=0.7,
                    do_sample=True,
                    pad_token_id=self.tokenizer.eos_token_id
                )
            
            generated_ids = [
                output_ids[len(input_ids):] 
                for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
            ]

            response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
            return response.strip()
        except Exception as e:
            logger.error(f"Error analyzing logs: {e}")
            return f"Error during analysis: {str(e)}"

if __name__ == "__main__":
    # Test with provided logs
    sample_log = {
      "date": "2026-01-24",
      "branch": "SBI Jabalpur",
      "avg_wait_time_sec": 420,
      "max_wait_time_sec": 980,
      "customers_served": 134,
      "counter_1_avg_service": 180,
      "counter_2_avg_service": 310,
      "peak_hour": "12:00-13:00",
      "queue_overflow_events": 5
    }
    
    # Note: Loading the model might take time and memory. 
    # For the sake of this script, we'll just print the prompt it would use.
    print("LLM Analyzer initialized. Ready to process logs.")