# app.py import gradio as gr from transformers import pipeline import re from datetime import datetime, timedelta import torch # Initialize sentiment analysis pipeline try: sentiment_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english") except: sentiment_analyzer = None def extract_log_level(line): """Extract log level from line""" if 'ERROR' in line.upper(): return 'ERROR' elif 'WARN' in line.upper() or 'WARNING' in line.upper(): return 'WARN' elif 'INFO' in line.upper(): return 'INFO' elif 'DEBUG' in line.upper(): return 'DEBUG' else: return 'UNKNOWN' def analyze_patterns(lines): """Detect patterns in logs""" patterns = { 'memory_issues': False, 'connection_issues': False, 'performance_issues': False, 'lock_issues': False, 'disk_issues': False, 'timeout_issues': False } keywords = { 'memory_issues': ['memory', 'oom', 'heap', 'ram'], 'connection_issues': ['connection', 'timeout', 'refused', 'unreachable'], 'performance_issues': ['slow', 'cpu', 'performance', 'latency'], 'lock_issues': ['lock', 'deadlock', 'blocked'], 'disk_issues': ['disk', 'storage', 'space', 'inode'], 'timeout_issues': ['timeout', 'timed out', 'deadline exceeded'] } for line in lines: line_lower = line.lower() for pattern_type, pattern_keywords in keywords.items(): if any(keyword in line_lower for keyword in pattern_keywords): patterns[pattern_type] = True return patterns def generate_predictions(error_count, warn_count, patterns, sentiment_score): """Generate predictions based on analysis""" predictions = [] # Memory issues prediction if patterns['memory_issues'] and warn_count > 0: confidence = min(95, 70 + (warn_count * 5)) predictions.append({ 'icon': '⚠️', 'type': 'warning', 'title': 'Memory Pressure Detected', 'message': f'Based on memory warnings, pod restart likely within 2-4 hours if load increases. Consider scaling horizontally or increasing memory limits.', 'confidence': confidence, 'action': 'Review memory usage metrics and consider pod autoscaling' }) # Connection issues prediction if patterns['connection_issues']: confidence = min(95, 75 + (error_count * 3)) predictions.append({ 'icon': '🔴', 'type': 'critical', 'title': 'Connection Instability', 'message': 'Database/service connection issues may cascade to dependent services. Network or connection pool exhaustion detected.', 'confidence': confidence, 'action': 'Check connection pool settings and network stability' }) # Performance degradation if patterns['performance_issues']: confidence = min(90, 65 + (warn_count * 4)) predictions.append({ 'icon': '⚠️', 'type': 'warning', 'title': 'Performance Degradation', 'message': 'Slow queries or high CPU detected. Performance will likely degrade further under increased load.', 'confidence': confidence, 'action': 'Optimize queries and review resource allocation' }) # Lock/Deadlock issues if patterns['lock_issues']: confidence = min(85, 60 + (error_count * 5)) predictions.append({ 'icon': '🔴', 'type': 'critical', 'title': 'Resource Contention', 'message': 'Lock acquisition failures suggest possible deadlock scenario. Transaction conflicts detected.', 'confidence': confidence, 'action': 'Review transaction isolation levels and locking strategy' }) # Disk issues if patterns['disk_issues']: confidence = min(90, 70 + (error_count * 4)) predictions.append({ 'icon': '🔴', 'type': 'critical', 'title': 'Disk Space Warning', 'message': 'Disk space or inode exhaustion detected. Service interruption imminent if not addressed.', 'confidence': confidence, 'action': 'Clean up logs and temporary files, expand storage' }) # Timeout cascade prediction if patterns['timeout_issues'] and error_count > 2: confidence = min(88, 68 + (error_count * 3)) predictions.append({ 'icon': '⚠️', 'type': 'warning', 'title': 'Timeout Cascade Risk', 'message': 'Multiple timeout events detected. This pattern often leads to cascading failures across microservices.', 'confidence': confidence, 'action': 'Increase timeout thresholds or implement circuit breakers' }) # All clear if not predictions and error_count == 0: predictions.append({ 'icon': '✅', 'type': 'success', 'title': 'All Systems Nominal', 'message': 'No concerning patterns detected. Your deployment looks healthy! Keep monitoring.', 'confidence': 95, 'action': 'Continue normal operations' }) return predictions def calculate_health_score(error_count, warn_count, info_count, sentiment_score): """Calculate overall health score""" base_score = 100 # Deduct points for errors and warnings base_score -= error_count * 15 base_score -= warn_count * 5 # Factor in sentiment if available if sentiment_score is not None: base_score = base_score * 0.7 + sentiment_score * 0.3 return max(0, min(100, base_score)) def analyze_sentiment(lines): """Analyze sentiment of log messages""" if not sentiment_analyzer: return None try: # Extract message content (remove timestamps and log levels) messages = [] for line in lines: # Remove common log prefixes cleaned = re.sub(r'^\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}', '', line) cleaned = re.sub(r'^(ERROR|WARN|WARNING|INFO|DEBUG)', '', cleaned) cleaned = cleaned.strip() if cleaned and len(cleaned) > 10: messages.append(cleaned[:512]) # Limit length if not messages: return None # Analyze sentiment (take average) results = sentiment_analyzer(messages[:20]) # Limit to avoid timeout positive_count = sum(1 for r in results if r['label'] == 'POSITIVE') sentiment_score = (positive_count / len(results)) * 100 return sentiment_score except: return None def format_prediction_html(predictions): """Format predictions as HTML""" html = "" for pred in predictions: color = { 'critical': '#ef4444', 'warning': '#f59e0b', 'success': '#10b981' }.get(pred['type'], '#6b7280') html += f"""
{pred['icon']} {pred['title']}
{pred['message']}
Confidence: {pred['confidence']}% | Action: {pred['action']}
""" return html def analyze_logs(log_text): """Main analysis function""" if not log_text.strip(): return "⚠️ Please paste some logs to analyze", "", "" lines = [line.strip() for line in log_text.split('\n') if line.strip()] # Count log levels error_count = sum(1 for line in lines if extract_log_level(line) == 'ERROR') warn_count = sum(1 for line in lines if extract_log_level(line) == 'WARN') info_count = sum(1 for line in lines if extract_log_level(line) == 'INFO') # Analyze patterns patterns = analyze_patterns(lines) # Sentiment analysis sentiment_score = analyze_sentiment(lines) # Calculate health score health_score = calculate_health_score(error_count, warn_count, info_count, sentiment_score) # Generate predictions predictions = generate_predictions(error_count, warn_count, patterns, sentiment_score) # Format summary health_color = '#10b981' if health_score > 75 else '#f59e0b' if health_score > 50 else '#ef4444' summary = f"""

🔮 DevOps Fortune Teller Analysis

AI-Powered Predictive Log Analysis
{error_count}
Errors
{warn_count}
Warnings
{info_count}
Info
{health_score}%
Health Score
""" # Format patterns detected patterns_html = "

🔍 Patterns Detected:

" # Format predictions predictions_html = "

🎯 Predictions & Recommendations:

" + format_prediction_html(predictions) return summary, patterns_html, predictions_html # Sample logs for demo sample_logs = """2026-01-10 14:23:45 INFO Deployment started for service-auth v2.1.0 2026-01-10 14:23:47 WARN Memory usage at 78% on pod-auth-3 2026-01-10 14:23:50 INFO Health check passed for 3/3 pods 2026-01-10 14:24:01 ERROR Connection timeout to database cluster db-primary 2026-01-10 14:24:02 INFO Retrying connection (attempt 1/3) 2026-01-10 14:24:05 WARN Slow query detected: SELECT * FROM users WHERE status='active' (2.3s) 2026-01-10 14:24:08 ERROR Connection timeout to database cluster db-primary 2026-01-10 14:24:10 INFO Connection restored to db-primary 2026-01-10 14:24:15 ERROR Failed to acquire lock on resource user_session_123 2026-01-10 14:24:18 WARN High CPU usage detected: 89% on pod-auth-2 2026-01-10 14:24:20 INFO Processing queue: 1247 items pending 2026-01-10 14:24:25 ERROR Disk space warning: /var/log at 92% capacity 2026-01-10 14:24:30 WARN Response time degradation: p95 latency 1.8s (threshold: 1.0s)""" # Create Gradio interface with gr.Blocks(theme=gr.themes.Soft(), title="DevOps Fortune Teller") as demo: gr.Markdown(""" # 🔮 DevOps Fortune Teller ### AI-Powered Predictive Log Analysis for DevOps Paste your deployment, application, or error logs below and get AI-powered predictions about potential issues before they escalate. """) with gr.Row(): with gr.Column(scale=1): log_input = gr.Textbox( label="📋 Paste Your Logs Here", placeholder="Paste your logs here (supports standard formats with ERROR, WARN, INFO levels)...", lines=15, max_lines=20 ) with gr.Row(): analyze_btn = gr.Button("🔮 Predict Issues", variant="primary", size="lg") sample_btn = gr.Button("📝 Load Sample Logs", size="lg") with gr.Column(scale=1): summary_output = gr.HTML(label="Summary") patterns_output = gr.HTML(label="Patterns") predictions_output = gr.HTML(label="Predictions") gr.Markdown(""" --- ### 🎯 How It Works This tool uses transformer-based sentiment analysis combined with pattern recognition to: - Detect concerning patterns in your logs - Predict potential issues before they become critical - Provide actionable recommendations - Calculate a health score for your deployment **Supported Log Levels:** ERROR, WARN/WARNING, INFO, DEBUG """) # Button actions analyze_btn.click( fn=analyze_logs, inputs=[log_input], outputs=[summary_output, patterns_output, predictions_output] ) sample_btn.click( fn=lambda: sample_logs, outputs=[log_input] ) # Launch the app if __name__ == "__main__": demo.launch()