Spaces:
Running
Running
File size: 4,578 Bytes
ad1bda5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import json
import logging
import os
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class LogAnalyzer:
def __init__(self, model_id="Qwen/Qwen2.5-1.5B-Instruct", hf_token=None):
self.model_id = model_id
self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.model = None
self.tokenizer = None
if hf_token is None:
hf_token = os.getenv("HF_TOKEN")
self.hf_token = hf_token
self._load_model()
def _load_model(self):
try:
logger.info(f"Loading model {self.model_id} on device: {self.device}")
tokenizer_kwargs = {
"trust_remote_code": True
}
model_kwargs = {
"torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32,
"device_map": "auto" if torch.cuda.is_available() else None,
"trust_remote_code": True,
"low_cpu_mem_usage": True
}
if self.hf_token:
tokenizer_kwargs["token"] = self.hf_token
model_kwargs["token"] = self.hf_token
logger.info("Using Hugging Face token for authentication")
self.tokenizer = AutoTokenizer.from_pretrained(
self.model_id,
**tokenizer_kwargs
)
self.model = AutoModelForCausalLM.from_pretrained(
self.model_id,
**model_kwargs
)
if not torch.cuda.is_available():
self.model = self.model.to(self.device)
logger.info("Model loaded successfully")
except Exception as e:
logger.error(f"Error loading model: {e}")
self.model = None
self.tokenizer = None
raise
def analyze_logs(self, log_data: dict) -> str:
try:
if self.model is None or self.tokenizer is None:
raise RuntimeError("Model not loaded. Please check model initialization.")
if not isinstance(log_data, dict):
raise ValueError("log_data must be a dictionary")
prompt = f"""Analyze the following Queue Management Log and provide actionable insights and recommendations.
Log Data:
{json.dumps(log_data, indent=2)}
Please provide:
1. A summary of the branch performance.
2. Identification of any bottlenecks or issues.
3. Specific recommendations to improve efficiency.
4. Predicted impact of the recommendations."""
messages = [
{"role": "system", "content": "You are an expert AI Queue Management Consultant."},
{"role": "user", "content": prompt}
]
text = self.tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
model_inputs = self.tokenizer([text], return_tensors="pt").to(self.device)
with torch.no_grad():
generated_ids = self.model.generate(
**model_inputs,
max_new_tokens=512,
temperature=0.7,
do_sample=True,
pad_token_id=self.tokenizer.eos_token_id
)
generated_ids = [
output_ids[len(input_ids):]
for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
return response.strip()
except Exception as e:
logger.error(f"Error analyzing logs: {e}")
return f"Error during analysis: {str(e)}"
if __name__ == "__main__":
# Test with provided logs
sample_log = {
"date": "2026-01-24",
"branch": "SBI Jabalpur",
"avg_wait_time_sec": 420,
"max_wait_time_sec": 980,
"customers_served": 134,
"counter_1_avg_service": 180,
"counter_2_avg_service": 310,
"peak_hour": "12:00-13:00",
"queue_overflow_events": 5
}
# Note: Loading the model might take time and memory.
# For the sake of this script, we'll just print the prompt it would use.
print("LLM Analyzer initialized. Ready to process logs.")
|