""" IIS Log Analyzer - Streamlit Application High-performance log analysis tool for large IIS log files (200MB-1GB+) """ import streamlit as st import plotly.graph_objects as go import plotly.express as px from plotly.subplots import make_subplots import pandas as pd from pathlib import Path import tempfile from typing import List import time from log_parser import IISLogParser, LogAnalyzer, analyze_multiple_logs # Page configuration st.set_page_config( page_title="IIS Log Analyzer", page_icon="📊", layout="wide", initial_sidebar_state="expanded" ) # Custom CSS st.markdown(""" """, unsafe_allow_html=True) def format_number(num: int) -> str: """Format large numbers with thousand separators.""" return f"{num:,}" def create_summary_table(stats: dict) -> pd.DataFrame: """Create summary statistics table.""" # Get threshold in seconds for display threshold_ms = stats.get("slow_threshold", 3000) threshold_display = f">{threshold_ms}ms" if threshold_ms >= 1000 else f">{threshold_ms}ms" data = { "Metric": [ "Total Requests (before filtering)", "Excluded Requests (HEAD+Zabbix + 401)", "Processed Requests", "Errors (≠200, ≠401)", f"Slow Requests ({threshold_display})", "Peak RPS", "Peak Timestamp", "Avg Response Time (ms)", "Max Response Time (ms)", "Min Response Time (ms)", ], "Value": [ format_number(stats["total_requests_before"]), format_number(stats["excluded_requests"]), format_number(stats["total_requests_after"]), format_number(stats["errors"]), format_number(stats["slow_requests"]), format_number(stats["peak_rps"]), stats["peak_timestamp"] or "N/A", format_number(stats["avg_time_ms"]), format_number(stats["max_time_ms"]), format_number(stats["min_time_ms"]), ] } return pd.DataFrame(data) def create_response_time_chart(dist: dict, title: str) -> go.Figure: """Create response time distribution chart.""" labels = list(dist.keys()) values = list(dist.values()) fig = go.Figure(data=[ go.Bar( x=labels, y=values, marker_color='lightblue', text=values, textposition='auto', ) ]) fig.update_layout( title=title, xaxis_title="Response Time Range", yaxis_title="Request Count", height=400, showlegend=False ) return fig def create_top_methods_chart(methods: List[dict], title: str) -> go.Figure: """Create top methods bar chart.""" if not methods: return go.Figure() df = pd.DataFrame(methods) fig = make_subplots( rows=1, cols=2, subplot_titles=("Request Count", "Avg Response Time (ms)") ) # Request count fig.add_trace( go.Bar( x=df["method_name"], y=df["count"], name="Count", marker_color='steelblue', text=df["count"], textposition='auto', ), row=1, col=1 ) # Average time fig.add_trace( go.Bar( x=df["method_name"], y=df["avg_time"].round(1), name="Avg Time", marker_color='coral', text=df["avg_time"].round(1), textposition='auto', ), row=1, col=2 ) fig.update_layout( title_text=title, height=400, showlegend=False ) return fig def create_metrics_comparison(individual_stats: List[dict]) -> go.Figure: """Create comparison chart for multiple services.""" services = [s["summary"]["service_name"] for s in individual_stats] requests = [s["summary"]["total_requests_after"] for s in individual_stats] errors = [s["summary"]["errors"] for s in individual_stats] avg_times = [s["summary"]["avg_time_ms"] for s in individual_stats] fig = make_subplots( rows=1, cols=3, subplot_titles=("Processed Requests", "Errors", "Avg Response Time (ms)"), specs=[[{"type": "bar"}, {"type": "bar"}, {"type": "bar"}]] ) fig.add_trace( go.Bar(x=services, y=requests, marker_color='lightblue', text=requests, textposition='auto'), row=1, col=1 ) fig.add_trace( go.Bar(x=services, y=errors, marker_color='salmon', text=errors, textposition='auto'), row=1, col=2 ) fig.add_trace( go.Bar(x=services, y=avg_times, marker_color='lightgreen', text=avg_times, textposition='auto'), row=1, col=3 ) fig.update_layout( title_text="Service Comparison", height=400, showlegend=False ) return fig def process_log_file(file_path: str, service_name: str = None, slow_threshold: int = 3000) -> dict: """Process a single log file and return statistics.""" parser = IISLogParser(file_path) if service_name: parser.service_name = service_name with st.spinner(f"Parsing {Path(file_path).name}..."): df = parser.parse() if df.height == 0: st.error(f"No valid log entries found in {Path(file_path).name}") return None with st.spinner(f"Analyzing {parser.service_name}..."): analyzer = LogAnalyzer(df, parser.service_name, slow_threshold) stats = { "summary": analyzer.get_summary_stats(), "top_methods": analyzer.get_top_methods(), "error_breakdown": analyzer.get_error_breakdown(), "errors_by_method": analyzer.get_errors_by_method(n=10), "response_time_dist": analyzer.get_response_time_distribution(), "analyzer": analyzer, # Keep reference for detailed error queries } return stats def main(): st.title("📊 IIS Log Performance Analyzer") st.markdown("High-performance analysis tool for large IIS log files (up to 1GB+)") # Sidebar st.sidebar.header("Configuration") # File upload mode upload_mode = st.sidebar.radio( "Upload Mode", ["Single File", "Multiple Files"], help="Analyze one or multiple log files" ) # File uploader if upload_mode == "Single File": uploaded_files = st.sidebar.file_uploader( "Upload IIS Log File", type=["log", "txt"], help="Upload IIS W3C Extended format log file" ) uploaded_files = [uploaded_files] if uploaded_files else [] else: uploaded_files = st.sidebar.file_uploader( "Upload IIS Log Files", type=["log", "txt"], accept_multiple_files=True, help="Upload multiple IIS log files for comparison" ) # Analysis options st.sidebar.header("Analysis Options") show_top_n = st.sidebar.slider("Top N Methods", 3, 20, 5) slow_threshold = st.sidebar.number_input( "Slow Request Threshold (ms)", min_value=100, max_value=10000, value=3000, step=100 ) # Process files if uploaded_files: st.info(f"Processing {len(uploaded_files)} file(s)...") # Save uploaded files to temp directory temp_files = [] for uploaded_file in uploaded_files: with tempfile.NamedTemporaryFile(delete=False, suffix=".log") as tmp: tmp.write(uploaded_file.getvalue()) temp_files.append(tmp.name) start_time = time.time() # Process each file all_stats = [] for i, temp_file in enumerate(temp_files): file_name = uploaded_files[i].name st.subheader(f"📄 {file_name}") stats = process_log_file(temp_file, None, slow_threshold) if stats: all_stats.append(stats) # Display summary metrics col1, col2, col3, col4 = st.columns(4) with col1: st.metric( "Total Requests", format_number(stats["summary"]["total_requests_after"]) ) with col2: st.metric( "Errors", format_number(stats["summary"]["errors"]), delta=None, delta_color="inverse" ) with col3: st.metric( "Avg Time (ms)", format_number(stats["summary"]["avg_time_ms"]) ) with col4: st.metric( "Peak RPS", format_number(stats["summary"]["peak_rps"]) ) # Tabs for detailed analysis tab1, tab2, tab3, tab4, tab5 = st.tabs([ "Summary", "Top Methods", "Response Time", "Error Breakdown", "Errors by Method" ]) with tab1: st.dataframe( create_summary_table(stats["summary"]), hide_index=True, use_container_width=True ) with tab2: if stats["top_methods"]: st.plotly_chart( create_top_methods_chart( stats["top_methods"][:show_top_n], f"Top {show_top_n} Methods - {stats['summary']['service_name']}" ), use_container_width=True ) # Show table methods_df = pd.DataFrame(stats["top_methods"][:show_top_n]) methods_df["avg_time"] = methods_df["avg_time"].round(1) st.dataframe(methods_df, hide_index=True, use_container_width=True) else: st.info("No method data available") with tab3: if stats["response_time_dist"]: st.plotly_chart( create_response_time_chart( stats["response_time_dist"], f"Response Time Distribution - {stats['summary']['service_name']}" ), use_container_width=True ) else: st.info("No response time distribution data") with tab4: if stats["error_breakdown"]: error_df = pd.DataFrame(stats["error_breakdown"]) error_df.columns = ["Status Code", "Count"] st.dataframe(error_df, hide_index=True, use_container_width=True) # Pie chart fig = px.pie( error_df, values="Count", names="Status Code", title=f"Error Distribution - {stats['summary']['service_name']}" ) st.plotly_chart(fig, use_container_width=True) else: st.success("No errors found! ✓") with tab5: st.markdown("### 🔍 Errors by Method") st.markdown("This view shows which specific methods are causing errors, with full context for debugging.") if stats["errors_by_method"]: # Display summary table errors_method_df = pd.DataFrame(stats["errors_by_method"]) errors_method_df["error_rate_percent"] = errors_method_df["error_rate_percent"].round(2) errors_method_df["avg_response_time_ms"] = errors_method_df["avg_response_time_ms"].round(1) # Rename columns for better display errors_method_df.columns = [ "Method Path", "Total Calls", "Error Count", "Most Common Error", "Avg Response Time (ms)", "Error Rate (%)" ] st.dataframe(errors_method_df, hide_index=True, use_container_width=True) # Bar chart of top error-prone methods fig = go.Figure() fig.add_trace(go.Bar( x=errors_method_df["Method Path"], y=errors_method_df["Error Count"], marker_color='red', text=errors_method_df["Error Count"], textposition='auto', name="Error Count" )) fig.update_layout( title=f"Top Error-Prone Methods - {stats['summary']['service_name']}", xaxis_title="Method Path", yaxis_title="Error Count", height=400, showlegend=False ) st.plotly_chart(fig, use_container_width=True) # Allow users to drill down into specific methods st.markdown("#### 🔎 Detailed Error Logs") selected_method = st.selectbox( "Select a method to view detailed error logs:", options=["All"] + errors_method_df["Method Path"].tolist(), key=f"method_select_{file_name}" ) if selected_method and selected_method != "All": error_details = stats["analyzer"].get_error_details( method_path=selected_method, limit=50 ) if error_details: details_df = pd.DataFrame(error_details) st.dataframe(details_df, hide_index=True, use_container_width=True) st.info(f"Showing up to 50 most recent errors for {selected_method}") else: st.info(f"No error details found for {selected_method}") elif selected_method == "All": error_details = stats["analyzer"].get_error_details(limit=50) if error_details: details_df = pd.DataFrame(error_details) st.dataframe(details_df, hide_index=True, use_container_width=True) st.info("Showing up to 50 most recent errors across all methods") else: st.success("No errors found in any methods! ✓") st.divider() # Multi-file comparison if len(all_stats) > 1: st.header("📊 Service Comparison") st.plotly_chart( create_metrics_comparison(all_stats), use_container_width=True ) # Combined summary st.subheader("Combined Statistics") combined = { "total_requests_before": sum(s["summary"]["total_requests_before"] for s in all_stats), "excluded_requests": sum(s["summary"]["excluded_requests"] for s in all_stats), "total_requests_after": sum(s["summary"]["total_requests_after"] for s in all_stats), "errors": sum(s["summary"]["errors"] for s in all_stats), "slow_requests": sum(s["summary"]["slow_requests"] for s in all_stats), } col1, col2, col3 = st.columns(3) with col1: st.metric("Total Requests (All Services)", format_number(combined["total_requests_after"])) with col2: st.metric("Total Errors (All Services)", format_number(combined["errors"])) with col3: st.metric("Total Slow Requests (All Services)", format_number(combined["slow_requests"])) processing_time = time.time() - start_time st.success(f"✓ Analysis completed in {processing_time:.2f} seconds") # Clean up temp files for temp_file in temp_files: Path(temp_file).unlink(missing_ok=True) else: # Welcome screen st.info("👆 Upload one or more IIS log files to begin analysis") st.markdown(""" ### Features - ⚡ **Fast processing** of large files (200MB-1GB+) using Polars - 📊 **Comprehensive metrics**: RPS, response times, error rates - 🔍 **Detailed analysis**: Top methods, error breakdown, time distribution - 📈 **Visual reports**: Interactive charts with Plotly - 🔄 **Multi-file support**: Compare multiple services side-by-side ### Log Format This tool supports **IIS W3C Extended Log Format** with the following fields: ``` date time s-ip cs-method cs-uri-stem cs-uri-query s-port cs-username c-ip cs(User-Agent) cs(Referer) sc-status sc-substatus sc-win32-status time-taken ``` ### Filtering Rules - Excludes lines with both `HEAD` method and `Zabbix` in User-Agent - 401 Unauthorized responses are excluded from error counts - Errors are defined as status codes ≠ 200 and ≠ 401 - Slow requests are those with response time > 3000ms (configurable) """) if __name__ == "__main__": main()