Spaces:
Sleeping
Sleeping
| # app.py | |
| import gradio as gr | |
| from transformers import pipeline | |
| import re | |
| from datetime import datetime, timedelta | |
| import torch | |
| # Initialize sentiment analysis pipeline | |
| try: | |
| sentiment_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english") | |
| except: | |
| sentiment_analyzer = None | |
| def extract_log_level(line): | |
| """Extract log level from line""" | |
| if 'ERROR' in line.upper(): | |
| return 'ERROR' | |
| elif 'WARN' in line.upper() or 'WARNING' in line.upper(): | |
| return 'WARN' | |
| elif 'INFO' in line.upper(): | |
| return 'INFO' | |
| elif 'DEBUG' in line.upper(): | |
| return 'DEBUG' | |
| else: | |
| return 'UNKNOWN' | |
| def analyze_patterns(lines): | |
| """Detect patterns in logs""" | |
| patterns = { | |
| 'memory_issues': False, | |
| 'connection_issues': False, | |
| 'performance_issues': False, | |
| 'lock_issues': False, | |
| 'disk_issues': False, | |
| 'timeout_issues': False | |
| } | |
| keywords = { | |
| 'memory_issues': ['memory', 'oom', 'heap', 'ram'], | |
| 'connection_issues': ['connection', 'timeout', 'refused', 'unreachable'], | |
| 'performance_issues': ['slow', 'cpu', 'performance', 'latency'], | |
| 'lock_issues': ['lock', 'deadlock', 'blocked'], | |
| 'disk_issues': ['disk', 'storage', 'space', 'inode'], | |
| 'timeout_issues': ['timeout', 'timed out', 'deadline exceeded'] | |
| } | |
| for line in lines: | |
| line_lower = line.lower() | |
| for pattern_type, pattern_keywords in keywords.items(): | |
| if any(keyword in line_lower for keyword in pattern_keywords): | |
| patterns[pattern_type] = True | |
| return patterns | |
| def generate_predictions(error_count, warn_count, patterns, sentiment_score): | |
| """Generate predictions based on analysis""" | |
| predictions = [] | |
| # Memory issues prediction | |
| if patterns['memory_issues'] and warn_count > 0: | |
| confidence = min(95, 70 + (warn_count * 5)) | |
| predictions.append({ | |
| 'icon': 'โ ๏ธ', | |
| 'type': 'warning', | |
| 'title': 'Memory Pressure Detected', | |
| 'message': f'Based on memory warnings, pod restart likely within 2-4 hours if load increases. Consider scaling horizontally or increasing memory limits.', | |
| 'confidence': confidence, | |
| 'action': 'Review memory usage metrics and consider pod autoscaling' | |
| }) | |
| # Connection issues prediction | |
| if patterns['connection_issues']: | |
| confidence = min(95, 75 + (error_count * 3)) | |
| predictions.append({ | |
| 'icon': '๐ด', | |
| 'type': 'critical', | |
| 'title': 'Connection Instability', | |
| 'message': 'Database/service connection issues may cascade to dependent services. Network or connection pool exhaustion detected.', | |
| 'confidence': confidence, | |
| 'action': 'Check connection pool settings and network stability' | |
| }) | |
| # Performance degradation | |
| if patterns['performance_issues']: | |
| confidence = min(90, 65 + (warn_count * 4)) | |
| predictions.append({ | |
| 'icon': 'โ ๏ธ', | |
| 'type': 'warning', | |
| 'title': 'Performance Degradation', | |
| 'message': 'Slow queries or high CPU detected. Performance will likely degrade further under increased load.', | |
| 'confidence': confidence, | |
| 'action': 'Optimize queries and review resource allocation' | |
| }) | |
| # Lock/Deadlock issues | |
| if patterns['lock_issues']: | |
| confidence = min(85, 60 + (error_count * 5)) | |
| predictions.append({ | |
| 'icon': '๐ด', | |
| 'type': 'critical', | |
| 'title': 'Resource Contention', | |
| 'message': 'Lock acquisition failures suggest possible deadlock scenario. Transaction conflicts detected.', | |
| 'confidence': confidence, | |
| 'action': 'Review transaction isolation levels and locking strategy' | |
| }) | |
| # Disk issues | |
| if patterns['disk_issues']: | |
| confidence = min(90, 70 + (error_count * 4)) | |
| predictions.append({ | |
| 'icon': '๐ด', | |
| 'type': 'critical', | |
| 'title': 'Disk Space Warning', | |
| 'message': 'Disk space or inode exhaustion detected. Service interruption imminent if not addressed.', | |
| 'confidence': confidence, | |
| 'action': 'Clean up logs and temporary files, expand storage' | |
| }) | |
| # Timeout cascade prediction | |
| if patterns['timeout_issues'] and error_count > 2: | |
| confidence = min(88, 68 + (error_count * 3)) | |
| predictions.append({ | |
| 'icon': 'โ ๏ธ', | |
| 'type': 'warning', | |
| 'title': 'Timeout Cascade Risk', | |
| 'message': 'Multiple timeout events detected. This pattern often leads to cascading failures across microservices.', | |
| 'confidence': confidence, | |
| 'action': 'Increase timeout thresholds or implement circuit breakers' | |
| }) | |
| # All clear | |
| if not predictions and error_count == 0: | |
| predictions.append({ | |
| 'icon': 'โ ', | |
| 'type': 'success', | |
| 'title': 'All Systems Nominal', | |
| 'message': 'No concerning patterns detected. Your deployment looks healthy! Keep monitoring.', | |
| 'confidence': 95, | |
| 'action': 'Continue normal operations' | |
| }) | |
| return predictions | |
| def calculate_health_score(error_count, warn_count, info_count, sentiment_score): | |
| """Calculate overall health score""" | |
| base_score = 100 | |
| # Deduct points for errors and warnings | |
| base_score -= error_count * 15 | |
| base_score -= warn_count * 5 | |
| # Factor in sentiment if available | |
| if sentiment_score is not None: | |
| base_score = base_score * 0.7 + sentiment_score * 0.3 | |
| return max(0, min(100, base_score)) | |
| def analyze_sentiment(lines): | |
| """Analyze sentiment of log messages""" | |
| if not sentiment_analyzer: | |
| return None | |
| try: | |
| # Extract message content (remove timestamps and log levels) | |
| messages = [] | |
| for line in lines: | |
| # Remove common log prefixes | |
| cleaned = re.sub(r'^\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}', '', line) | |
| cleaned = re.sub(r'^(ERROR|WARN|WARNING|INFO|DEBUG)', '', cleaned) | |
| cleaned = cleaned.strip() | |
| if cleaned and len(cleaned) > 10: | |
| messages.append(cleaned[:512]) # Limit length | |
| if not messages: | |
| return None | |
| # Analyze sentiment (take average) | |
| results = sentiment_analyzer(messages[:20]) # Limit to avoid timeout | |
| positive_count = sum(1 for r in results if r['label'] == 'POSITIVE') | |
| sentiment_score = (positive_count / len(results)) * 100 | |
| return sentiment_score | |
| except: | |
| return None | |
| def format_prediction_html(predictions): | |
| """Format predictions as HTML""" | |
| html = "" | |
| for pred in predictions: | |
| color = { | |
| 'critical': '#ef4444', | |
| 'warning': '#f59e0b', | |
| 'success': '#10b981' | |
| }.get(pred['type'], '#6b7280') | |
| html += f""" | |
| <div style="border-left: 4px solid {color}; padding: 12px; margin: 10px 0; background: #f9fafb; border-radius: 4px;"> | |
| <div style="font-size: 18px; margin-bottom: 4px;">{pred['icon']} <strong>{pred['title']}</strong></div> | |
| <div style="color: #4b5563; margin-bottom: 8px;">{pred['message']}</div> | |
| <div style="font-size: 12px; color: #6b7280;"> | |
| <strong>Confidence:</strong> {pred['confidence']}% | | |
| <strong>Action:</strong> {pred['action']} | |
| </div> | |
| </div> | |
| """ | |
| return html | |
| def analyze_logs(log_text): | |
| """Main analysis function""" | |
| if not log_text.strip(): | |
| return "โ ๏ธ Please paste some logs to analyze", "", "" | |
| lines = [line.strip() for line in log_text.split('\n') if line.strip()] | |
| # Count log levels | |
| error_count = sum(1 for line in lines if extract_log_level(line) == 'ERROR') | |
| warn_count = sum(1 for line in lines if extract_log_level(line) == 'WARN') | |
| info_count = sum(1 for line in lines if extract_log_level(line) == 'INFO') | |
| # Analyze patterns | |
| patterns = analyze_patterns(lines) | |
| # Sentiment analysis | |
| sentiment_score = analyze_sentiment(lines) | |
| # Calculate health score | |
| health_score = calculate_health_score(error_count, warn_count, info_count, sentiment_score) | |
| # Generate predictions | |
| predictions = generate_predictions(error_count, warn_count, patterns, sentiment_score) | |
| # Format summary | |
| health_color = '#10b981' if health_score > 75 else '#f59e0b' if health_score > 50 else '#ef4444' | |
| summary = f""" | |
| <div style="padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 8px; color: white;"> | |
| <h2 style="margin: 0 0 10px 0;">๐ฎ DevOps Fortune Teller Analysis</h2> | |
| <div style="font-size: 14px; opacity: 0.9;">AI-Powered Predictive Log Analysis</div> | |
| </div> | |
| <div style="display: grid; grid-template-columns: repeat(4, 1fr); gap: 10px; margin: 20px 0;"> | |
| <div style="background: #fee2e2; padding: 15px; border-radius: 8px; text-align: center;"> | |
| <div style="font-size: 24px; font-weight: bold; color: #dc2626;">{error_count}</div> | |
| <div style="color: #991b1b; font-size: 12px;">Errors</div> | |
| </div> | |
| <div style="background: #fef3c7; padding: 15px; border-radius: 8px; text-align: center;"> | |
| <div style="font-size: 24px; font-weight: bold; color: #d97706;">{warn_count}</div> | |
| <div style="color: #92400e; font-size: 12px;">Warnings</div> | |
| </div> | |
| <div style="background: #dbeafe; padding: 15px; border-radius: 8px; text-align: center;"> | |
| <div style="font-size: 24px; font-weight: bold; color: #2563eb;">{info_count}</div> | |
| <div style="color: #1e40af; font-size: 12px;">Info</div> | |
| </div> | |
| <div style="background: {health_color}20; padding: 15px; border-radius: 8px; text-align: center;"> | |
| <div style="font-size: 24px; font-weight: bold; color: {health_color};">{health_score}%</div> | |
| <div style="color: #374151; font-size: 12px;">Health Score</div> | |
| </div> | |
| </div> | |
| """ | |
| # Format patterns detected | |
| patterns_html = "<h3>๐ Patterns Detected:</h3><ul style='color: #4b5563;'>" | |
| pattern_names = { | |
| 'memory_issues': 'Memory Pressure', | |
| 'connection_issues': 'Connection Problems', | |
| 'performance_issues': 'Performance Issues', | |
| 'lock_issues': 'Lock Contention', | |
| 'disk_issues': 'Disk Space Issues', | |
| 'timeout_issues': 'Timeout Events' | |
| } | |
| detected = [pattern_names[k] for k, v in patterns.items() if v] | |
| if detected: | |
| for pattern in detected: | |
| patterns_html += f"<li>{pattern}</li>" | |
| else: | |
| patterns_html += "<li>No critical patterns detected</li>" | |
| patterns_html += "</ul>" | |
| # Format predictions | |
| predictions_html = "<h3>๐ฏ Predictions & Recommendations:</h3>" + format_prediction_html(predictions) | |
| return summary, patterns_html, predictions_html | |
| # Sample logs for demo | |
| sample_logs = """2026-01-10 14:23:45 INFO Deployment started for service-auth v2.1.0 | |
| 2026-01-10 14:23:47 WARN Memory usage at 78% on pod-auth-3 | |
| 2026-01-10 14:23:50 INFO Health check passed for 3/3 pods | |
| 2026-01-10 14:24:01 ERROR Connection timeout to database cluster db-primary | |
| 2026-01-10 14:24:02 INFO Retrying connection (attempt 1/3) | |
| 2026-01-10 14:24:05 WARN Slow query detected: SELECT * FROM users WHERE status='active' (2.3s) | |
| 2026-01-10 14:24:08 ERROR Connection timeout to database cluster db-primary | |
| 2026-01-10 14:24:10 INFO Connection restored to db-primary | |
| 2026-01-10 14:24:15 ERROR Failed to acquire lock on resource user_session_123 | |
| 2026-01-10 14:24:18 WARN High CPU usage detected: 89% on pod-auth-2 | |
| 2026-01-10 14:24:20 INFO Processing queue: 1247 items pending | |
| 2026-01-10 14:24:25 ERROR Disk space warning: /var/log at 92% capacity | |
| 2026-01-10 14:24:30 WARN Response time degradation: p95 latency 1.8s (threshold: 1.0s)""" | |
| # Create Gradio interface | |
| with gr.Blocks(theme=gr.themes.Soft(), title="DevOps Fortune Teller") as demo: | |
| gr.Markdown(""" | |
| # ๐ฎ DevOps Fortune Teller | |
| ### AI-Powered Predictive Log Analysis for DevOps | |
| Paste your deployment, application, or error logs below and get AI-powered predictions about potential issues before they escalate. | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| log_input = gr.Textbox( | |
| label="๐ Paste Your Logs Here", | |
| placeholder="Paste your logs here (supports standard formats with ERROR, WARN, INFO levels)...", | |
| lines=15, | |
| max_lines=20 | |
| ) | |
| with gr.Row(): | |
| analyze_btn = gr.Button("๐ฎ Predict Issues", variant="primary", size="lg") | |
| sample_btn = gr.Button("๐ Load Sample Logs", size="lg") | |
| with gr.Column(scale=1): | |
| summary_output = gr.HTML(label="Summary") | |
| patterns_output = gr.HTML(label="Patterns") | |
| predictions_output = gr.HTML(label="Predictions") | |
| gr.Markdown(""" | |
| --- | |
| ### ๐ฏ How It Works | |
| This tool uses transformer-based sentiment analysis combined with pattern recognition to: | |
| - Detect concerning patterns in your logs | |
| - Predict potential issues before they become critical | |
| - Provide actionable recommendations | |
| - Calculate a health score for your deployment | |
| **Supported Log Levels:** ERROR, WARN/WARNING, INFO, DEBUG | |
| """) | |
| # Button actions | |
| analyze_btn.click( | |
| fn=analyze_logs, | |
| inputs=[log_input], | |
| outputs=[summary_output, patterns_output, predictions_output] | |
| ) | |
| sample_btn.click( | |
| fn=lambda: sample_logs, | |
| outputs=[log_input] | |
| ) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| demo.launch() |