ai-queue-management / llm_analyzer.py
Agent
Initial commit: AI Queue Management System
ad1bda5
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import json
import logging
import os
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class LogAnalyzer:
def __init__(self, model_id="Qwen/Qwen2.5-1.5B-Instruct", hf_token=None):
self.model_id = model_id
self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.model = None
self.tokenizer = None
if hf_token is None:
hf_token = os.getenv("HF_TOKEN")
self.hf_token = hf_token
self._load_model()
def _load_model(self):
try:
logger.info(f"Loading model {self.model_id} on device: {self.device}")
tokenizer_kwargs = {
"trust_remote_code": True
}
model_kwargs = {
"torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32,
"device_map": "auto" if torch.cuda.is_available() else None,
"trust_remote_code": True,
"low_cpu_mem_usage": True
}
if self.hf_token:
tokenizer_kwargs["token"] = self.hf_token
model_kwargs["token"] = self.hf_token
logger.info("Using Hugging Face token for authentication")
self.tokenizer = AutoTokenizer.from_pretrained(
self.model_id,
**tokenizer_kwargs
)
self.model = AutoModelForCausalLM.from_pretrained(
self.model_id,
**model_kwargs
)
if not torch.cuda.is_available():
self.model = self.model.to(self.device)
logger.info("Model loaded successfully")
except Exception as e:
logger.error(f"Error loading model: {e}")
self.model = None
self.tokenizer = None
raise
def analyze_logs(self, log_data: dict) -> str:
try:
if self.model is None or self.tokenizer is None:
raise RuntimeError("Model not loaded. Please check model initialization.")
if not isinstance(log_data, dict):
raise ValueError("log_data must be a dictionary")
prompt = f"""Analyze the following Queue Management Log and provide actionable insights and recommendations.
Log Data:
{json.dumps(log_data, indent=2)}
Please provide:
1. A summary of the branch performance.
2. Identification of any bottlenecks or issues.
3. Specific recommendations to improve efficiency.
4. Predicted impact of the recommendations."""
messages = [
{"role": "system", "content": "You are an expert AI Queue Management Consultant."},
{"role": "user", "content": prompt}
]
text = self.tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
model_inputs = self.tokenizer([text], return_tensors="pt").to(self.device)
with torch.no_grad():
generated_ids = self.model.generate(
**model_inputs,
max_new_tokens=512,
temperature=0.7,
do_sample=True,
pad_token_id=self.tokenizer.eos_token_id
)
generated_ids = [
output_ids[len(input_ids):]
for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
return response.strip()
except Exception as e:
logger.error(f"Error analyzing logs: {e}")
return f"Error during analysis: {str(e)}"
if __name__ == "__main__":
# Test with provided logs
sample_log = {
"date": "2026-01-24",
"branch": "SBI Jabalpur",
"avg_wait_time_sec": 420,
"max_wait_time_sec": 980,
"customers_served": 134,
"counter_1_avg_service": 180,
"counter_2_avg_service": 310,
"peak_hour": "12:00-13:00",
"queue_overflow_events": 5
}
# Note: Loading the model might take time and memory.
# For the sake of this script, we'll just print the prompt it would use.
print("LLM Analyzer initialized. Ready to process logs.")