Spaces:

Snaseem2026
/

devops-fortune-teller

Sleeping

App Files Files Community

devops-fortune-teller / app.py

Snaseem2026

Initial Commit

76ebfdf verified 26 days ago

raw

history blame contribute delete

14.1 kB

	# app.py
	import gradio as gr
	from transformers import pipeline
	import re
	from datetime import datetime, timedelta
	import torch

	# Initialize sentiment analysis pipeline
	try:
	sentiment_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
	except:
	sentiment_analyzer = None

	def extract_log_level(line):
	"""Extract log level from line"""
	if 'ERROR' in line.upper():
	return 'ERROR'
	elif 'WARN' in line.upper() or 'WARNING' in line.upper():
	return 'WARN'
	elif 'INFO' in line.upper():
	return 'INFO'
	elif 'DEBUG' in line.upper():
	return 'DEBUG'
	else:
	return 'UNKNOWN'

	def analyze_patterns(lines):
	"""Detect patterns in logs"""
	patterns = {
	'memory_issues': False,
	'connection_issues': False,
	'performance_issues': False,
	'lock_issues': False,
	'disk_issues': False,
	'timeout_issues': False
	}

	keywords = {
	'memory_issues': ['memory', 'oom', 'heap', 'ram'],
	'connection_issues': ['connection', 'timeout', 'refused', 'unreachable'],
	'performance_issues': ['slow', 'cpu', 'performance', 'latency'],
	'lock_issues': ['lock', 'deadlock', 'blocked'],
	'disk_issues': ['disk', 'storage', 'space', 'inode'],
	'timeout_issues': ['timeout', 'timed out', 'deadline exceeded']
	}

	for line in lines:
	line_lower = line.lower()
	for pattern_type, pattern_keywords in keywords.items():
	if any(keyword in line_lower for keyword in pattern_keywords):
	patterns[pattern_type] = True

	return patterns

	def generate_predictions(error_count, warn_count, patterns, sentiment_score):
	"""Generate predictions based on analysis"""
	predictions = []

	# Memory issues prediction
	if patterns['memory_issues'] and warn_count > 0:
	confidence = min(95, 70 + (warn_count * 5))
	predictions.append({
	'icon': '⚠️',
	'type': 'warning',
	'title': 'Memory Pressure Detected',
	'message': f'Based on memory warnings, pod restart likely within 2-4 hours if load increases. Consider scaling horizontally or increasing memory limits.',
	'confidence': confidence,
	'action': 'Review memory usage metrics and consider pod autoscaling'
	})

	# Connection issues prediction
	if patterns['connection_issues']:
	confidence = min(95, 75 + (error_count * 3))
	predictions.append({
	'icon': '🔴',
	'type': 'critical',
	'title': 'Connection Instability',
	'message': 'Database/service connection issues may cascade to dependent services. Network or connection pool exhaustion detected.',
	'confidence': confidence,
	'action': 'Check connection pool settings and network stability'
	})

	# Performance degradation
	if patterns['performance_issues']:
	confidence = min(90, 65 + (warn_count * 4))
	predictions.append({
	'icon': '⚠️',
	'type': 'warning',
	'title': 'Performance Degradation',
	'message': 'Slow queries or high CPU detected. Performance will likely degrade further under increased load.',
	'confidence': confidence,
	'action': 'Optimize queries and review resource allocation'
	})

	# Lock/Deadlock issues
	if patterns['lock_issues']:
	confidence = min(85, 60 + (error_count * 5))
	predictions.append({
	'icon': '🔴',
	'type': 'critical',
	'title': 'Resource Contention',
	'message': 'Lock acquisition failures suggest possible deadlock scenario. Transaction conflicts detected.',
	'confidence': confidence,
	'action': 'Review transaction isolation levels and locking strategy'
	})

	# Disk issues
	if patterns['disk_issues']:
	confidence = min(90, 70 + (error_count * 4))
	predictions.append({
	'icon': '🔴',
	'type': 'critical',
	'title': 'Disk Space Warning',
	'message': 'Disk space or inode exhaustion detected. Service interruption imminent if not addressed.',
	'confidence': confidence,
	'action': 'Clean up logs and temporary files, expand storage'
	})

	# Timeout cascade prediction
	if patterns['timeout_issues'] and error_count > 2:
	confidence = min(88, 68 + (error_count * 3))
	predictions.append({
	'icon': '⚠️',
	'type': 'warning',
	'title': 'Timeout Cascade Risk',
	'message': 'Multiple timeout events detected. This pattern often leads to cascading failures across microservices.',
	'confidence': confidence,
	'action': 'Increase timeout thresholds or implement circuit breakers'
	})

	# All clear
	if not predictions and error_count == 0:
	predictions.append({
	'icon': '✅',
	'type': 'success',
	'title': 'All Systems Nominal',
	'message': 'No concerning patterns detected. Your deployment looks healthy! Keep monitoring.',
	'confidence': 95,
	'action': 'Continue normal operations'
	})

	return predictions

	def calculate_health_score(error_count, warn_count, info_count, sentiment_score):
	"""Calculate overall health score"""
	base_score = 100

	# Deduct points for errors and warnings
	base_score -= error_count * 15
	base_score -= warn_count * 5

	# Factor in sentiment if available
	if sentiment_score is not None:
	base_score = base_score * 0.7 + sentiment_score * 0.3

	return max(0, min(100, base_score))

	def analyze_sentiment(lines):
	"""Analyze sentiment of log messages"""
	if not sentiment_analyzer:
	return None

	try:
	# Extract message content (remove timestamps and log levels)
	messages = []
	for line in lines:
	# Remove common log prefixes
	cleaned = re.sub(r'^\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}', '', line)
	cleaned = re.sub(r'^(ERROR\|WARN\|WARNING\|INFO\|DEBUG)', '', cleaned)
	cleaned = cleaned.strip()
	if cleaned and len(cleaned) > 10:
	messages.append(cleaned[:512]) # Limit length

	if not messages:
	return None

	# Analyze sentiment (take average)
	results = sentiment_analyzer(messages[:20]) # Limit to avoid timeout

	positive_count = sum(1 for r in results if r['label'] == 'POSITIVE')
	sentiment_score = (positive_count / len(results)) * 100

	return sentiment_score
	except:
	return None

	def format_prediction_html(predictions):
	"""Format predictions as HTML"""
	html = ""
	for pred in predictions:
	color = {
	'critical': '#ef4444',
	'warning': '#f59e0b',
	'success': '#10b981'
	}.get(pred['type'], '#6b7280')

	html += f"""
	<div style="border-left: 4px solid {color}; padding: 12px; margin: 10px 0; background: #f9fafb; border-radius: 4px;">
	<div style="font-size: 18px; margin-bottom: 4px;">{pred['icon']} <strong>{pred['title']}</strong></div>
	<div style="color: #4b5563; margin-bottom: 8px;">{pred['message']}</div>
	<div style="font-size: 12px; color: #6b7280;">
	<strong>Confidence:</strong> {pred['confidence']}% \|
	<strong>Action:</strong> {pred['action']}
	</div>
	</div>
	"""
	return html

	def analyze_logs(log_text):
	"""Main analysis function"""
	if not log_text.strip():
	return "⚠️ Please paste some logs to analyze", "", ""

	lines = [line.strip() for line in log_text.split('\n') if line.strip()]

	# Count log levels
	error_count = sum(1 for line in lines if extract_log_level(line) == 'ERROR')
	warn_count = sum(1 for line in lines if extract_log_level(line) == 'WARN')
	info_count = sum(1 for line in lines if extract_log_level(line) == 'INFO')

	# Analyze patterns
	patterns = analyze_patterns(lines)

	# Sentiment analysis
	sentiment_score = analyze_sentiment(lines)

	# Calculate health score
	health_score = calculate_health_score(error_count, warn_count, info_count, sentiment_score)

	# Generate predictions
	predictions = generate_predictions(error_count, warn_count, patterns, sentiment_score)

	# Format summary
	health_color = '#10b981' if health_score > 75 else '#f59e0b' if health_score > 50 else '#ef4444'

	summary = f"""
	<div style="padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 8px; color: white;">
	<h2 style="margin: 0 0 10px 0;">🔮 DevOps Fortune Teller Analysis</h2>
	<div style="font-size: 14px; opacity: 0.9;">AI-Powered Predictive Log Analysis</div>
	</div>

	<div style="display: grid; grid-template-columns: repeat(4, 1fr); gap: 10px; margin: 20px 0;">
	<div style="background: #fee2e2; padding: 15px; border-radius: 8px; text-align: center;">
	<div style="font-size: 24px; font-weight: bold; color: #dc2626;">{error_count}</div>
	<div style="color: #991b1b; font-size: 12px;">Errors</div>
	</div>
	<div style="background: #fef3c7; padding: 15px; border-radius: 8px; text-align: center;">
	<div style="font-size: 24px; font-weight: bold; color: #d97706;">{warn_count}</div>
	<div style="color: #92400e; font-size: 12px;">Warnings</div>
	</div>
	<div style="background: #dbeafe; padding: 15px; border-radius: 8px; text-align: center;">
	<div style="font-size: 24px; font-weight: bold; color: #2563eb;">{info_count}</div>
	<div style="color: #1e40af; font-size: 12px;">Info</div>
	</div>
	<div style="background: {health_color}20; padding: 15px; border-radius: 8px; text-align: center;">
	<div style="font-size: 24px; font-weight: bold; color: {health_color};">{health_score}%</div>
	<div style="color: #374151; font-size: 12px;">Health Score</div>
	</div>
	</div>
	"""

	# Format patterns detected
	patterns_html = "<h3>🔍 Patterns Detected:</h3><ul style='color: #4b5563;'>"
	pattern_names = {
	'memory_issues': 'Memory Pressure',
	'connection_issues': 'Connection Problems',
	'performance_issues': 'Performance Issues',
	'lock_issues': 'Lock Contention',
	'disk_issues': 'Disk Space Issues',
	'timeout_issues': 'Timeout Events'
	}
	detected = [pattern_names[k] for k, v in patterns.items() if v]
	if detected:
	for pattern in detected:
	patterns_html += f"<li>{pattern}</li>"
	else:
	patterns_html += "<li>No critical patterns detected</li>"
	patterns_html += "</ul>"

	# Format predictions
	predictions_html = "<h3>🎯 Predictions & Recommendations:</h3>" + format_prediction_html(predictions)

	return summary, patterns_html, predictions_html

	# Sample logs for demo
	sample_logs = """2026-01-10 14:23:45 INFO Deployment started for service-auth v2.1.0
	2026-01-10 14:23:47 WARN Memory usage at 78% on pod-auth-3
	2026-01-10 14:23:50 INFO Health check passed for 3/3 pods
	2026-01-10 14:24:01 ERROR Connection timeout to database cluster db-primary
	2026-01-10 14:24:02 INFO Retrying connection (attempt 1/3)
	2026-01-10 14:24:05 WARN Slow query detected: SELECT * FROM users WHERE status='active' (2.3s)
	2026-01-10 14:24:08 ERROR Connection timeout to database cluster db-primary
	2026-01-10 14:24:10 INFO Connection restored to db-primary
	2026-01-10 14:24:15 ERROR Failed to acquire lock on resource user_session_123
	2026-01-10 14:24:18 WARN High CPU usage detected: 89% on pod-auth-2
	2026-01-10 14:24:20 INFO Processing queue: 1247 items pending
	2026-01-10 14:24:25 ERROR Disk space warning: /var/log at 92% capacity
	2026-01-10 14:24:30 WARN Response time degradation: p95 latency 1.8s (threshold: 1.0s)"""

	# Create Gradio interface
	with gr.Blocks(theme=gr.themes.Soft(), title="DevOps Fortune Teller") as demo:
	gr.Markdown("""
	# 🔮 DevOps Fortune Teller
	### AI-Powered Predictive Log Analysis for DevOps
	Paste your deployment, application, or error logs below and get AI-powered predictions about potential issues before they escalate.
	""")

	with gr.Row():
	with gr.Column(scale=1):
	log_input = gr.Textbox(
	label="📋 Paste Your Logs Here",
	placeholder="Paste your logs here (supports standard formats with ERROR, WARN, INFO levels)...",
	lines=15,
	max_lines=20
	)

	with gr.Row():
	analyze_btn = gr.Button("🔮 Predict Issues", variant="primary", size="lg")
	sample_btn = gr.Button("📝 Load Sample Logs", size="lg")

	with gr.Column(scale=1):
	summary_output = gr.HTML(label="Summary")
	patterns_output = gr.HTML(label="Patterns")
	predictions_output = gr.HTML(label="Predictions")

	gr.Markdown("""
	---
	### 🎯 How It Works
	This tool uses transformer-based sentiment analysis combined with pattern recognition to:
	- Detect concerning patterns in your logs
	- Predict potential issues before they become critical
	- Provide actionable recommendations
	- Calculate a health score for your deployment

	Supported Log Levels: ERROR, WARN/WARNING, INFO, DEBUG
	""")

	# Button actions
	analyze_btn.click(
	fn=analyze_logs,
	inputs=[log_input],
	outputs=[summary_output, patterns_output, predictions_output]
	)

	sample_btn.click(
	fn=lambda: sample_logs,
	outputs=[log_input]
	)

	# Launch the app
	if __name__ == "__main__":
	demo.launch()