Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import json | |
| import torch | |
| from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, TFAutoModelForSeq2SeqLM | |
| # --- Model Loading --- | |
| # Summarization model (BART) | |
| def load_summarizer(): | |
| model_name = "VidhuMathur/bart-log-summarization" | |
| model = TFAutoModelForSeq2SeqLM.from_pretrained(model_name) | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| summarizer = pipeline( | |
| "summarization", | |
| model=model, | |
| tokenizer=tokenizer, | |
| device=0 if torch.cuda.is_available() else -1, | |
| ) | |
| return summarizer | |
| # Causal LM for analysis (Qwen) | |
| def load_qwen(): | |
| model_name = "Qwen/Qwen3-0.6B" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, | |
| ) | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model = model.to(device) | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| return model, tokenizer | |
| # --- Core Pipeline Functions --- | |
| def extract_json_simple(text): | |
| start = text.find('{') | |
| if start == -1: | |
| return None | |
| brace_count = 0 | |
| end = start | |
| for i, char in enumerate(text[start:], start): | |
| if char == '{': | |
| brace_count += 1 | |
| elif char == '}': | |
| brace_count -= 1 | |
| if brace_count == 0: | |
| end = i + 1 | |
| break | |
| if brace_count == 0: | |
| return text[start:end] | |
| return None | |
| def ensure_required_keys(analysis, summary): | |
| required_keys = { | |
| "root_cause": f"Issue identified from log analysis: {summary[:100]}...", | |
| "debugging_steps": [ | |
| "Check system logs for error patterns", | |
| "Verify service status and configuration", | |
| "Test connectivity and permissions" | |
| ], | |
| "debug_commands": [ | |
| "systemctl status service-name", | |
| "journalctl -u service-name -n 50", | |
| "netstat -tlnp | grep port" | |
| ], | |
| "useful_links": [ | |
| "https://docs.system-docs.com/troubleshooting", | |
| "https://stackoverflow.com/questions/tagged/debugging" | |
| ] | |
| } | |
| for key, default_value in required_keys.items(): | |
| if key not in analysis or not analysis[key]: | |
| analysis[key] = default_value | |
| elif isinstance(analysis[key], list) and len(analysis[key]) == 0: | |
| analysis[key] = default_value | |
| return analysis | |
| def create_fallback_analysis(summary): | |
| summary_lower = summary.lower() | |
| if any(word in summary_lower for word in ['database', 'connection', 'sql']): | |
| return { | |
| "root_cause": "Database connection issue detected in the logs", | |
| "debugging_steps": [ | |
| "Check if database service is running", | |
| "Verify database connection parameters", | |
| "Test network connectivity to database server", | |
| "Check database user permissions" | |
| ], | |
| "debug_commands": [ | |
| "sudo systemctl status postgresql", | |
| "netstat -an | grep 5432", | |
| "psql -U username -h host -d database", | |
| "ping database-host" | |
| ], | |
| "useful_links": [ | |
| "https://www.postgresql.org/docs/current/runtime.html", | |
| "https://dev.mysql.com/doc/refman/8.0/en/troubleshooting.html" | |
| ] | |
| } | |
| elif any(word in summary_lower for word in ['memory', 'heap', 'oom']): | |
| return { | |
| "root_cause": "Memory exhaustion or memory leak detected", | |
| "debugging_steps": [ | |
| "Monitor current memory usage", | |
| "Check for memory leaks in application", | |
| "Review JVM heap settings if Java application", | |
| "Analyze memory dump if available" | |
| ], | |
| "debug_commands": [ | |
| "free -h", | |
| "top -o %MEM", | |
| "jstat -gc PID", | |
| "ps aux --sort=-%mem | head" | |
| ], | |
| "useful_links": [ | |
| "https://docs.oracle.com/javase/8/docs/technotes/guides/troubleshoot/memleaks.html", | |
| "https://linux.die.net/man/1/free" | |
| ] | |
| } | |
| elif any(word in summary_lower for word in ['disk', 'space', 'full']): | |
| return { | |
| "root_cause": "Disk space exhaustion causing system issues", | |
| "debugging_steps": [ | |
| "Check disk usage across all filesystems", | |
| "Identify largest files and directories", | |
| "Clean up temporary files and logs", | |
| "Check for deleted files held by processes" | |
| ], | |
| "debug_commands": [ | |
| "df -h", | |
| "du -sh /* | sort -hr", | |
| "find /var/log -type f -size +100M", | |
| "lsof +L1" | |
| ], | |
| "useful_links": [ | |
| "https://linux.die.net/man/1/df", | |
| "https://www.cyberciti.biz/faq/linux-check-disk-space-command/" | |
| ] | |
| } | |
| else: | |
| return { | |
| "root_cause": f"System issue detected: {summary[:100]}...", | |
| "debugging_steps": [ | |
| "Review complete error logs", | |
| "Check system resource usage", | |
| "Verify service configurations", | |
| "Test system connectivity" | |
| ], | |
| "debug_commands": [ | |
| "systemctl --failed", | |
| "journalctl -p err -n 50", | |
| "htop", | |
| "netstat -tlnp" | |
| ], | |
| "useful_links": [ | |
| "https://linux.die.net/man/1/systemctl", | |
| "https://www.freedesktop.org/software/systemd/man/journalctl.html" | |
| ] | |
| } | |
| def log_processing_pipeline(raw_log, summarizer, model, tokenizer): | |
| results = { | |
| 'raw_log': raw_log, | |
| 'summary': None, | |
| 'analysis': None, | |
| 'success': False, | |
| 'errors': [] | |
| } | |
| # Step 1: Summarization | |
| try: | |
| summary_result = summarizer(raw_log, max_length=350, min_length=40, do_sample=False) | |
| summary_text = summary_result[0]['summary_text'] | |
| results['summary'] = summary_text | |
| except Exception as e: | |
| results['errors'].append(f"Summarization failed: {e}") | |
| return results | |
| # Step 2: Analysis | |
| success = False | |
| attempts = 0 | |
| max_attempts = 2 | |
| while not success and attempts < max_attempts: | |
| attempts += 1 | |
| prompt = f"""Analyze this log summary and respond with ONLY a JSON object:\n\nLog: {summary_text}\n\nRequired JSON format:\n{{\n \"root_cause\": \"explain the main problem\",\n \"debugging_steps\": [\"step 1\", \"step 2\", \"step 3\"],\n \"debug_commands\": [\"command1\", \"command2\", \"command3\"],\n \"useful_links\": [\"link1\", \"link2\"]\n}}\n\nJSON:""" | |
| try: | |
| inputs = tokenizer(prompt, return_tensors="pt", max_length=800, truncation=True) | |
| device = next(model.parameters()).device | |
| inputs = {k: v.to(device) for k, v in inputs.items()} | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=300, | |
| temperature=0.2, | |
| do_sample=True, | |
| pad_token_id=tokenizer.eos_token_id, | |
| eos_token_id=tokenizer.eos_token_id, | |
| repetition_penalty=1.1 | |
| ) | |
| response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True) | |
| json_str = extract_json_simple(response) | |
| if json_str: | |
| try: | |
| parsed = json.loads(json_str) | |
| fixed_analysis = ensure_required_keys(parsed, summary_text) | |
| results['analysis'] = fixed_analysis | |
| results['success'] = True | |
| success = True | |
| except json.JSONDecodeError: | |
| if attempts == max_attempts: | |
| results['errors'].append(f"JSON parsing failed after {attempts} attempts") | |
| else: | |
| if attempts == max_attempts: | |
| results['errors'].append("No valid JSON found in response") | |
| except Exception as e: | |
| if attempts == max_attempts: | |
| results['errors'].append(f"Generation failed: {e}") | |
| if not results['success']: | |
| results['analysis'] = create_fallback_analysis(summary_text) | |
| results['success'] = True | |
| results['errors'].append("Used fallback analysis due to model issues") | |
| return results | |
| # --- Gradio Interface --- | |
| def process_log_file(file_obj, summarizer, model, tokenizer): | |
| if file_obj is None: | |
| return ("No file uploaded", "", "", "", "") | |
| try: | |
| encodings = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1'] | |
| log_content = None | |
| for encoding in encodings: | |
| try: | |
| with open(file_obj.name, 'r', encoding=encoding) as f: | |
| log_content = f.read() | |
| break | |
| except UnicodeDecodeError: | |
| continue | |
| if log_content is None: | |
| return ("Encoding error", "", "", "", "") | |
| if not log_content.strip(): | |
| return ("Empty file", "", "", "", "") | |
| if len(log_content) > 100000: | |
| log_content = log_content[:100000] + "\n... (file truncated)" | |
| results = log_processing_pipeline(log_content, summarizer, model, tokenizer) | |
| if results['success']: | |
| analysis = results['analysis'] | |
| return ( | |
| "Analysis complete", | |
| results['summary'], | |
| analysis.get('root_cause', ''), | |
| '\n'.join(analysis.get('debugging_steps', [])), | |
| '\n'.join(analysis.get('debug_commands', [])), | |
| '\n'.join(analysis.get('useful_links', [])), | |
| json.dumps(results, indent=2) | |
| ) | |
| else: | |
| return ("Analysis failed", "", "", "", "") | |
| except Exception as e: | |
| return (f"Processing error: {str(e)}", "", "", "", "") | |
| def main(): | |
| summarizer = load_summarizer() | |
| model, tokenizer = load_qwen() | |
| with gr.Blocks(title="Minimal LogLens") as app: | |
| gr.Markdown("# Minimal LogLens Log Analyzer") | |
| file_input = gr.File(label="Upload Log File", file_types=[".txt", ".log", ".out", ".err"], type="filepath") | |
| analyze_btn = gr.Button("Analyze Log") | |
| status = gr.Textbox(label="Status", interactive=False) | |
| summary = gr.Textbox(label="Summary", lines=3, interactive=False) | |
| root_cause = gr.Textbox(label="Root Cause", lines=2, interactive=False) | |
| debug_steps = gr.Textbox(label="Debugging Steps", lines=4, interactive=False) | |
| debug_commands = gr.Textbox(label="Debug Commands", lines=4, interactive=False) | |
| useful_links = gr.Textbox(label="Useful Links", lines=2, interactive=False) | |
| json_output = gr.Code(label="Full JSON Output", language="json", interactive=False) | |
| analyze_btn.click( | |
| fn=lambda f: process_log_file(f, summarizer, model, tokenizer), | |
| inputs=file_input, | |
| outputs=[status, summary, root_cause, debug_steps, debug_commands, useful_links, json_output] | |
| ) | |
| app.launch() | |
| if __name__ == "__main__": | |
| main() |