Spaces:
Sleeping
Sleeping
File size: 14,090 Bytes
76ebfdf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 |
# app.py
import gradio as gr
from transformers import pipeline
import re
from datetime import datetime, timedelta
import torch
# Initialize sentiment analysis pipeline
try:
sentiment_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
except:
sentiment_analyzer = None
def extract_log_level(line):
"""Extract log level from line"""
if 'ERROR' in line.upper():
return 'ERROR'
elif 'WARN' in line.upper() or 'WARNING' in line.upper():
return 'WARN'
elif 'INFO' in line.upper():
return 'INFO'
elif 'DEBUG' in line.upper():
return 'DEBUG'
else:
return 'UNKNOWN'
def analyze_patterns(lines):
"""Detect patterns in logs"""
patterns = {
'memory_issues': False,
'connection_issues': False,
'performance_issues': False,
'lock_issues': False,
'disk_issues': False,
'timeout_issues': False
}
keywords = {
'memory_issues': ['memory', 'oom', 'heap', 'ram'],
'connection_issues': ['connection', 'timeout', 'refused', 'unreachable'],
'performance_issues': ['slow', 'cpu', 'performance', 'latency'],
'lock_issues': ['lock', 'deadlock', 'blocked'],
'disk_issues': ['disk', 'storage', 'space', 'inode'],
'timeout_issues': ['timeout', 'timed out', 'deadline exceeded']
}
for line in lines:
line_lower = line.lower()
for pattern_type, pattern_keywords in keywords.items():
if any(keyword in line_lower for keyword in pattern_keywords):
patterns[pattern_type] = True
return patterns
def generate_predictions(error_count, warn_count, patterns, sentiment_score):
"""Generate predictions based on analysis"""
predictions = []
# Memory issues prediction
if patterns['memory_issues'] and warn_count > 0:
confidence = min(95, 70 + (warn_count * 5))
predictions.append({
'icon': 'โ ๏ธ',
'type': 'warning',
'title': 'Memory Pressure Detected',
'message': f'Based on memory warnings, pod restart likely within 2-4 hours if load increases. Consider scaling horizontally or increasing memory limits.',
'confidence': confidence,
'action': 'Review memory usage metrics and consider pod autoscaling'
})
# Connection issues prediction
if patterns['connection_issues']:
confidence = min(95, 75 + (error_count * 3))
predictions.append({
'icon': '๐ด',
'type': 'critical',
'title': 'Connection Instability',
'message': 'Database/service connection issues may cascade to dependent services. Network or connection pool exhaustion detected.',
'confidence': confidence,
'action': 'Check connection pool settings and network stability'
})
# Performance degradation
if patterns['performance_issues']:
confidence = min(90, 65 + (warn_count * 4))
predictions.append({
'icon': 'โ ๏ธ',
'type': 'warning',
'title': 'Performance Degradation',
'message': 'Slow queries or high CPU detected. Performance will likely degrade further under increased load.',
'confidence': confidence,
'action': 'Optimize queries and review resource allocation'
})
# Lock/Deadlock issues
if patterns['lock_issues']:
confidence = min(85, 60 + (error_count * 5))
predictions.append({
'icon': '๐ด',
'type': 'critical',
'title': 'Resource Contention',
'message': 'Lock acquisition failures suggest possible deadlock scenario. Transaction conflicts detected.',
'confidence': confidence,
'action': 'Review transaction isolation levels and locking strategy'
})
# Disk issues
if patterns['disk_issues']:
confidence = min(90, 70 + (error_count * 4))
predictions.append({
'icon': '๐ด',
'type': 'critical',
'title': 'Disk Space Warning',
'message': 'Disk space or inode exhaustion detected. Service interruption imminent if not addressed.',
'confidence': confidence,
'action': 'Clean up logs and temporary files, expand storage'
})
# Timeout cascade prediction
if patterns['timeout_issues'] and error_count > 2:
confidence = min(88, 68 + (error_count * 3))
predictions.append({
'icon': 'โ ๏ธ',
'type': 'warning',
'title': 'Timeout Cascade Risk',
'message': 'Multiple timeout events detected. This pattern often leads to cascading failures across microservices.',
'confidence': confidence,
'action': 'Increase timeout thresholds or implement circuit breakers'
})
# All clear
if not predictions and error_count == 0:
predictions.append({
'icon': 'โ
',
'type': 'success',
'title': 'All Systems Nominal',
'message': 'No concerning patterns detected. Your deployment looks healthy! Keep monitoring.',
'confidence': 95,
'action': 'Continue normal operations'
})
return predictions
def calculate_health_score(error_count, warn_count, info_count, sentiment_score):
"""Calculate overall health score"""
base_score = 100
# Deduct points for errors and warnings
base_score -= error_count * 15
base_score -= warn_count * 5
# Factor in sentiment if available
if sentiment_score is not None:
base_score = base_score * 0.7 + sentiment_score * 0.3
return max(0, min(100, base_score))
def analyze_sentiment(lines):
"""Analyze sentiment of log messages"""
if not sentiment_analyzer:
return None
try:
# Extract message content (remove timestamps and log levels)
messages = []
for line in lines:
# Remove common log prefixes
cleaned = re.sub(r'^\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}', '', line)
cleaned = re.sub(r'^(ERROR|WARN|WARNING|INFO|DEBUG)', '', cleaned)
cleaned = cleaned.strip()
if cleaned and len(cleaned) > 10:
messages.append(cleaned[:512]) # Limit length
if not messages:
return None
# Analyze sentiment (take average)
results = sentiment_analyzer(messages[:20]) # Limit to avoid timeout
positive_count = sum(1 for r in results if r['label'] == 'POSITIVE')
sentiment_score = (positive_count / len(results)) * 100
return sentiment_score
except:
return None
def format_prediction_html(predictions):
"""Format predictions as HTML"""
html = ""
for pred in predictions:
color = {
'critical': '#ef4444',
'warning': '#f59e0b',
'success': '#10b981'
}.get(pred['type'], '#6b7280')
html += f"""
<div style="border-left: 4px solid {color}; padding: 12px; margin: 10px 0; background: #f9fafb; border-radius: 4px;">
<div style="font-size: 18px; margin-bottom: 4px;">{pred['icon']} <strong>{pred['title']}</strong></div>
<div style="color: #4b5563; margin-bottom: 8px;">{pred['message']}</div>
<div style="font-size: 12px; color: #6b7280;">
<strong>Confidence:</strong> {pred['confidence']}% |
<strong>Action:</strong> {pred['action']}
</div>
</div>
"""
return html
def analyze_logs(log_text):
"""Main analysis function"""
if not log_text.strip():
return "โ ๏ธ Please paste some logs to analyze", "", ""
lines = [line.strip() for line in log_text.split('\n') if line.strip()]
# Count log levels
error_count = sum(1 for line in lines if extract_log_level(line) == 'ERROR')
warn_count = sum(1 for line in lines if extract_log_level(line) == 'WARN')
info_count = sum(1 for line in lines if extract_log_level(line) == 'INFO')
# Analyze patterns
patterns = analyze_patterns(lines)
# Sentiment analysis
sentiment_score = analyze_sentiment(lines)
# Calculate health score
health_score = calculate_health_score(error_count, warn_count, info_count, sentiment_score)
# Generate predictions
predictions = generate_predictions(error_count, warn_count, patterns, sentiment_score)
# Format summary
health_color = '#10b981' if health_score > 75 else '#f59e0b' if health_score > 50 else '#ef4444'
summary = f"""
<div style="padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 8px; color: white;">
<h2 style="margin: 0 0 10px 0;">๐ฎ DevOps Fortune Teller Analysis</h2>
<div style="font-size: 14px; opacity: 0.9;">AI-Powered Predictive Log Analysis</div>
</div>
<div style="display: grid; grid-template-columns: repeat(4, 1fr); gap: 10px; margin: 20px 0;">
<div style="background: #fee2e2; padding: 15px; border-radius: 8px; text-align: center;">
<div style="font-size: 24px; font-weight: bold; color: #dc2626;">{error_count}</div>
<div style="color: #991b1b; font-size: 12px;">Errors</div>
</div>
<div style="background: #fef3c7; padding: 15px; border-radius: 8px; text-align: center;">
<div style="font-size: 24px; font-weight: bold; color: #d97706;">{warn_count}</div>
<div style="color: #92400e; font-size: 12px;">Warnings</div>
</div>
<div style="background: #dbeafe; padding: 15px; border-radius: 8px; text-align: center;">
<div style="font-size: 24px; font-weight: bold; color: #2563eb;">{info_count}</div>
<div style="color: #1e40af; font-size: 12px;">Info</div>
</div>
<div style="background: {health_color}20; padding: 15px; border-radius: 8px; text-align: center;">
<div style="font-size: 24px; font-weight: bold; color: {health_color};">{health_score}%</div>
<div style="color: #374151; font-size: 12px;">Health Score</div>
</div>
</div>
"""
# Format patterns detected
patterns_html = "<h3>๐ Patterns Detected:</h3><ul style='color: #4b5563;'>"
pattern_names = {
'memory_issues': 'Memory Pressure',
'connection_issues': 'Connection Problems',
'performance_issues': 'Performance Issues',
'lock_issues': 'Lock Contention',
'disk_issues': 'Disk Space Issues',
'timeout_issues': 'Timeout Events'
}
detected = [pattern_names[k] for k, v in patterns.items() if v]
if detected:
for pattern in detected:
patterns_html += f"<li>{pattern}</li>"
else:
patterns_html += "<li>No critical patterns detected</li>"
patterns_html += "</ul>"
# Format predictions
predictions_html = "<h3>๐ฏ Predictions & Recommendations:</h3>" + format_prediction_html(predictions)
return summary, patterns_html, predictions_html
# Sample logs for demo
sample_logs = """2026-01-10 14:23:45 INFO Deployment started for service-auth v2.1.0
2026-01-10 14:23:47 WARN Memory usage at 78% on pod-auth-3
2026-01-10 14:23:50 INFO Health check passed for 3/3 pods
2026-01-10 14:24:01 ERROR Connection timeout to database cluster db-primary
2026-01-10 14:24:02 INFO Retrying connection (attempt 1/3)
2026-01-10 14:24:05 WARN Slow query detected: SELECT * FROM users WHERE status='active' (2.3s)
2026-01-10 14:24:08 ERROR Connection timeout to database cluster db-primary
2026-01-10 14:24:10 INFO Connection restored to db-primary
2026-01-10 14:24:15 ERROR Failed to acquire lock on resource user_session_123
2026-01-10 14:24:18 WARN High CPU usage detected: 89% on pod-auth-2
2026-01-10 14:24:20 INFO Processing queue: 1247 items pending
2026-01-10 14:24:25 ERROR Disk space warning: /var/log at 92% capacity
2026-01-10 14:24:30 WARN Response time degradation: p95 latency 1.8s (threshold: 1.0s)"""
# Create Gradio interface
with gr.Blocks(theme=gr.themes.Soft(), title="DevOps Fortune Teller") as demo:
gr.Markdown("""
# ๐ฎ DevOps Fortune Teller
### AI-Powered Predictive Log Analysis for DevOps
Paste your deployment, application, or error logs below and get AI-powered predictions about potential issues before they escalate.
""")
with gr.Row():
with gr.Column(scale=1):
log_input = gr.Textbox(
label="๐ Paste Your Logs Here",
placeholder="Paste your logs here (supports standard formats with ERROR, WARN, INFO levels)...",
lines=15,
max_lines=20
)
with gr.Row():
analyze_btn = gr.Button("๐ฎ Predict Issues", variant="primary", size="lg")
sample_btn = gr.Button("๐ Load Sample Logs", size="lg")
with gr.Column(scale=1):
summary_output = gr.HTML(label="Summary")
patterns_output = gr.HTML(label="Patterns")
predictions_output = gr.HTML(label="Predictions")
gr.Markdown("""
---
### ๐ฏ How It Works
This tool uses transformer-based sentiment analysis combined with pattern recognition to:
- Detect concerning patterns in your logs
- Predict potential issues before they become critical
- Provide actionable recommendations
- Calculate a health score for your deployment
**Supported Log Levels:** ERROR, WARN/WARNING, INFO, DEBUG
""")
# Button actions
analyze_btn.click(
fn=analyze_logs,
inputs=[log_input],
outputs=[summary_output, patterns_output, predictions_output]
)
sample_btn.click(
fn=lambda: sample_logs,
outputs=[log_input]
)
# Launch the app
if __name__ == "__main__":
demo.launch() |