Spaces:
Sleeping
Sleeping
| """ | |
| IIS Log Analyzer - Streamlit Application | |
| High-performance log analysis tool for large IIS log files (200MB-1GB+) | |
| """ | |
| import streamlit as st | |
| import plotly.graph_objects as go | |
| import plotly.express as px | |
| from plotly.subplots import make_subplots | |
| import pandas as pd | |
| from pathlib import Path | |
| import tempfile | |
| from typing import List | |
| import time | |
| from log_parser import IISLogParser, LogAnalyzer, analyze_multiple_logs | |
| # Page configuration | |
| st.set_page_config( | |
| page_title="IIS Log Analyzer", | |
| page_icon="π", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # Custom CSS | |
| st.markdown(""" | |
| <style> | |
| .metric-card { | |
| background-color: #f0f2f6; | |
| padding: 20px; | |
| border-radius: 10px; | |
| margin: 10px 0; | |
| } | |
| .error-metric { | |
| background-color: #ffebee; | |
| } | |
| .success-metric { | |
| background-color: #e8f5e9; | |
| } | |
| .warning-metric { | |
| background-color: #fff3e0; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| def format_number(num: int) -> str: | |
| """Format large numbers with thousand separators.""" | |
| return f"{num:,}" | |
| def create_summary_table(stats: dict) -> pd.DataFrame: | |
| """Create summary statistics table.""" | |
| # Get threshold in seconds for display | |
| threshold_ms = stats.get("slow_threshold", 3000) | |
| threshold_display = f">{threshold_ms}ms" if threshold_ms >= 1000 else f">{threshold_ms}ms" | |
| data = { | |
| "Metric": [ | |
| "Total Requests (before filtering)", | |
| "Excluded Requests (HEAD+Zabbix + 401)", | |
| "Processed Requests", | |
| "Errors (β 200, β 401)", | |
| f"Slow Requests ({threshold_display})", | |
| "Peak RPS", | |
| "Peak Timestamp", | |
| "Avg Response Time (ms)", | |
| "Max Response Time (ms)", | |
| "Min Response Time (ms)", | |
| ], | |
| "Value": [ | |
| format_number(stats["total_requests_before"]), | |
| format_number(stats["excluded_requests"]), | |
| format_number(stats["total_requests_after"]), | |
| format_number(stats["errors"]), | |
| format_number(stats["slow_requests"]), | |
| format_number(stats["peak_rps"]), | |
| stats["peak_timestamp"] or "N/A", | |
| format_number(stats["avg_time_ms"]), | |
| format_number(stats["max_time_ms"]), | |
| format_number(stats["min_time_ms"]), | |
| ] | |
| } | |
| return pd.DataFrame(data) | |
| def create_response_time_chart(dist: dict, title: str) -> go.Figure: | |
| """Create response time distribution chart.""" | |
| labels = list(dist.keys()) | |
| values = list(dist.values()) | |
| fig = go.Figure(data=[ | |
| go.Bar( | |
| x=labels, | |
| y=values, | |
| marker_color='lightblue', | |
| text=values, | |
| textposition='auto', | |
| ) | |
| ]) | |
| fig.update_layout( | |
| title=title, | |
| xaxis_title="Response Time Range", | |
| yaxis_title="Request Count", | |
| height=400, | |
| showlegend=False | |
| ) | |
| return fig | |
| def create_top_methods_chart(methods: List[dict], title: str) -> go.Figure: | |
| """Create top methods bar chart.""" | |
| if not methods: | |
| return go.Figure() | |
| df = pd.DataFrame(methods) | |
| fig = make_subplots( | |
| rows=1, cols=2, | |
| subplot_titles=("Request Count", "Avg Response Time (ms)") | |
| ) | |
| # Request count | |
| fig.add_trace( | |
| go.Bar( | |
| x=df["method_name"], | |
| y=df["count"], | |
| name="Count", | |
| marker_color='steelblue', | |
| text=df["count"], | |
| textposition='auto', | |
| ), | |
| row=1, col=1 | |
| ) | |
| # Average time | |
| fig.add_trace( | |
| go.Bar( | |
| x=df["method_name"], | |
| y=df["avg_time"].round(1), | |
| name="Avg Time", | |
| marker_color='coral', | |
| text=df["avg_time"].round(1), | |
| textposition='auto', | |
| ), | |
| row=1, col=2 | |
| ) | |
| fig.update_layout( | |
| title_text=title, | |
| height=400, | |
| showlegend=False | |
| ) | |
| return fig | |
| def create_metrics_comparison(individual_stats: List[dict]) -> go.Figure: | |
| """Create comparison chart for multiple services.""" | |
| services = [s["summary"]["service_name"] for s in individual_stats] | |
| requests = [s["summary"]["total_requests_after"] for s in individual_stats] | |
| errors = [s["summary"]["errors"] for s in individual_stats] | |
| avg_times = [s["summary"]["avg_time_ms"] for s in individual_stats] | |
| fig = make_subplots( | |
| rows=1, cols=3, | |
| subplot_titles=("Processed Requests", "Errors", "Avg Response Time (ms)"), | |
| specs=[[{"type": "bar"}, {"type": "bar"}, {"type": "bar"}]] | |
| ) | |
| fig.add_trace( | |
| go.Bar(x=services, y=requests, marker_color='lightblue', text=requests, textposition='auto'), | |
| row=1, col=1 | |
| ) | |
| fig.add_trace( | |
| go.Bar(x=services, y=errors, marker_color='salmon', text=errors, textposition='auto'), | |
| row=1, col=2 | |
| ) | |
| fig.add_trace( | |
| go.Bar(x=services, y=avg_times, marker_color='lightgreen', text=avg_times, textposition='auto'), | |
| row=1, col=3 | |
| ) | |
| fig.update_layout( | |
| title_text="Service Comparison", | |
| height=400, | |
| showlegend=False | |
| ) | |
| return fig | |
| def process_log_file(file_path: str, service_name: str = None, slow_threshold: int = 3000) -> dict: | |
| """Process a single log file and return statistics.""" | |
| parser = IISLogParser(file_path) | |
| if service_name: | |
| parser.service_name = service_name | |
| with st.spinner(f"Parsing {Path(file_path).name}..."): | |
| df = parser.parse() | |
| if df.height == 0: | |
| st.error(f"No valid log entries found in {Path(file_path).name}") | |
| return None | |
| with st.spinner(f"Analyzing {parser.service_name}..."): | |
| analyzer = LogAnalyzer(df, parser.service_name, slow_threshold) | |
| stats = { | |
| "summary": analyzer.get_summary_stats(), | |
| "top_methods": analyzer.get_top_methods(), | |
| "error_breakdown": analyzer.get_error_breakdown(), | |
| "errors_by_method": analyzer.get_errors_by_method(n=10), | |
| "response_time_dist": analyzer.get_response_time_distribution(), | |
| "analyzer": analyzer, # Keep reference for detailed error queries | |
| } | |
| return stats | |
| def main(): | |
| st.title("π IIS Log Performance Analyzer") | |
| st.markdown("High-performance analysis tool for large IIS log files (up to 1GB+)") | |
| # Sidebar | |
| st.sidebar.header("Configuration") | |
| # File upload mode | |
| upload_mode = st.sidebar.radio( | |
| "Upload Mode", | |
| ["Single File", "Multiple Files"], | |
| help="Analyze one or multiple log files" | |
| ) | |
| # File uploader | |
| if upload_mode == "Single File": | |
| uploaded_files = st.sidebar.file_uploader( | |
| "Upload IIS Log File", | |
| type=["log", "txt"], | |
| help="Upload IIS W3C Extended format log file" | |
| ) | |
| uploaded_files = [uploaded_files] if uploaded_files else [] | |
| else: | |
| uploaded_files = st.sidebar.file_uploader( | |
| "Upload IIS Log Files", | |
| type=["log", "txt"], | |
| accept_multiple_files=True, | |
| help="Upload multiple IIS log files for comparison" | |
| ) | |
| # Analysis options | |
| st.sidebar.header("Analysis Options") | |
| show_top_n = st.sidebar.slider("Top N Methods", 3, 20, 5) | |
| slow_threshold = st.sidebar.number_input( | |
| "Slow Request Threshold (ms)", | |
| min_value=100, | |
| max_value=10000, | |
| value=3000, | |
| step=100 | |
| ) | |
| # Process files | |
| if uploaded_files: | |
| st.info(f"Processing {len(uploaded_files)} file(s)...") | |
| # Save uploaded files to temp directory | |
| temp_files = [] | |
| for uploaded_file in uploaded_files: | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".log") as tmp: | |
| tmp.write(uploaded_file.getvalue()) | |
| temp_files.append(tmp.name) | |
| start_time = time.time() | |
| # Process each file | |
| all_stats = [] | |
| for i, temp_file in enumerate(temp_files): | |
| file_name = uploaded_files[i].name | |
| st.subheader(f"π {file_name}") | |
| stats = process_log_file(temp_file, None, slow_threshold) | |
| if stats: | |
| all_stats.append(stats) | |
| # Display summary metrics | |
| col1, col2, col3, col4 = st.columns(4) | |
| with col1: | |
| st.metric( | |
| "Total Requests", | |
| format_number(stats["summary"]["total_requests_after"]) | |
| ) | |
| with col2: | |
| st.metric( | |
| "Errors", | |
| format_number(stats["summary"]["errors"]), | |
| delta=None, | |
| delta_color="inverse" | |
| ) | |
| with col3: | |
| st.metric( | |
| "Avg Time (ms)", | |
| format_number(stats["summary"]["avg_time_ms"]) | |
| ) | |
| with col4: | |
| st.metric( | |
| "Peak RPS", | |
| format_number(stats["summary"]["peak_rps"]) | |
| ) | |
| # Tabs for detailed analysis | |
| tab1, tab2, tab3, tab4, tab5 = st.tabs([ | |
| "Summary", "Top Methods", "Response Time", "Error Breakdown", "Errors by Method" | |
| ]) | |
| with tab1: | |
| st.dataframe( | |
| create_summary_table(stats["summary"]), | |
| hide_index=True, | |
| use_container_width=True | |
| ) | |
| with tab2: | |
| if stats["top_methods"]: | |
| st.plotly_chart( | |
| create_top_methods_chart( | |
| stats["top_methods"][:show_top_n], | |
| f"Top {show_top_n} Methods - {stats['summary']['service_name']}" | |
| ), | |
| use_container_width=True | |
| ) | |
| # Show table | |
| methods_df = pd.DataFrame(stats["top_methods"][:show_top_n]) | |
| methods_df["avg_time"] = methods_df["avg_time"].round(1) | |
| st.dataframe(methods_df, hide_index=True, use_container_width=True) | |
| else: | |
| st.info("No method data available") | |
| with tab3: | |
| if stats["response_time_dist"]: | |
| st.plotly_chart( | |
| create_response_time_chart( | |
| stats["response_time_dist"], | |
| f"Response Time Distribution - {stats['summary']['service_name']}" | |
| ), | |
| use_container_width=True | |
| ) | |
| else: | |
| st.info("No response time distribution data") | |
| with tab4: | |
| if stats["error_breakdown"]: | |
| error_df = pd.DataFrame(stats["error_breakdown"]) | |
| error_df.columns = ["Status Code", "Count"] | |
| st.dataframe(error_df, hide_index=True, use_container_width=True) | |
| # Pie chart | |
| fig = px.pie( | |
| error_df, | |
| values="Count", | |
| names="Status Code", | |
| title=f"Error Distribution - {stats['summary']['service_name']}" | |
| ) | |
| st.plotly_chart(fig, use_container_width=True) | |
| else: | |
| st.success("No errors found! β") | |
| with tab5: | |
| st.markdown("### π Errors by Method") | |
| st.markdown("This view shows which specific methods are causing errors, with full context for debugging.") | |
| if stats["errors_by_method"]: | |
| # Display summary table | |
| errors_method_df = pd.DataFrame(stats["errors_by_method"]) | |
| errors_method_df["error_rate_percent"] = errors_method_df["error_rate_percent"].round(2) | |
| errors_method_df["avg_response_time_ms"] = errors_method_df["avg_response_time_ms"].round(1) | |
| # Rename columns for better display | |
| errors_method_df.columns = [ | |
| "Method Path", "Total Calls", "Error Count", | |
| "Most Common Error", "Avg Response Time (ms)", "Error Rate (%)" | |
| ] | |
| st.dataframe(errors_method_df, hide_index=True, use_container_width=True) | |
| # Bar chart of top error-prone methods | |
| fig = go.Figure() | |
| fig.add_trace(go.Bar( | |
| x=errors_method_df["Method Path"], | |
| y=errors_method_df["Error Count"], | |
| marker_color='red', | |
| text=errors_method_df["Error Count"], | |
| textposition='auto', | |
| name="Error Count" | |
| )) | |
| fig.update_layout( | |
| title=f"Top Error-Prone Methods - {stats['summary']['service_name']}", | |
| xaxis_title="Method Path", | |
| yaxis_title="Error Count", | |
| height=400, | |
| showlegend=False | |
| ) | |
| st.plotly_chart(fig, use_container_width=True) | |
| # Allow users to drill down into specific methods | |
| st.markdown("#### π Detailed Error Logs") | |
| selected_method = st.selectbox( | |
| "Select a method to view detailed error logs:", | |
| options=["All"] + errors_method_df["Method Path"].tolist(), | |
| key=f"method_select_{file_name}" | |
| ) | |
| if selected_method and selected_method != "All": | |
| error_details = stats["analyzer"].get_error_details( | |
| method_path=selected_method, | |
| limit=50 | |
| ) | |
| if error_details: | |
| details_df = pd.DataFrame(error_details) | |
| st.dataframe(details_df, hide_index=True, use_container_width=True) | |
| st.info(f"Showing up to 50 most recent errors for {selected_method}") | |
| else: | |
| st.info(f"No error details found for {selected_method}") | |
| elif selected_method == "All": | |
| error_details = stats["analyzer"].get_error_details(limit=50) | |
| if error_details: | |
| details_df = pd.DataFrame(error_details) | |
| st.dataframe(details_df, hide_index=True, use_container_width=True) | |
| st.info("Showing up to 50 most recent errors across all methods") | |
| else: | |
| st.success("No errors found in any methods! β") | |
| st.divider() | |
| # Multi-file comparison | |
| if len(all_stats) > 1: | |
| st.header("π Service Comparison") | |
| st.plotly_chart( | |
| create_metrics_comparison(all_stats), | |
| use_container_width=True | |
| ) | |
| # Combined summary | |
| st.subheader("Combined Statistics") | |
| combined = { | |
| "total_requests_before": sum(s["summary"]["total_requests_before"] for s in all_stats), | |
| "excluded_requests": sum(s["summary"]["excluded_requests"] for s in all_stats), | |
| "total_requests_after": sum(s["summary"]["total_requests_after"] for s in all_stats), | |
| "errors": sum(s["summary"]["errors"] for s in all_stats), | |
| "slow_requests": sum(s["summary"]["slow_requests"] for s in all_stats), | |
| } | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| st.metric("Total Requests (All Services)", format_number(combined["total_requests_after"])) | |
| with col2: | |
| st.metric("Total Errors (All Services)", format_number(combined["errors"])) | |
| with col3: | |
| st.metric("Total Slow Requests (All Services)", format_number(combined["slow_requests"])) | |
| processing_time = time.time() - start_time | |
| st.success(f"β Analysis completed in {processing_time:.2f} seconds") | |
| # Clean up temp files | |
| for temp_file in temp_files: | |
| Path(temp_file).unlink(missing_ok=True) | |
| else: | |
| # Welcome screen | |
| st.info("π Upload one or more IIS log files to begin analysis") | |
| st.markdown(""" | |
| ### Features | |
| - β‘ **Fast processing** of large files (200MB-1GB+) using Polars | |
| - π **Comprehensive metrics**: RPS, response times, error rates | |
| - π **Detailed analysis**: Top methods, error breakdown, time distribution | |
| - π **Visual reports**: Interactive charts with Plotly | |
| - π **Multi-file support**: Compare multiple services side-by-side | |
| ### Log Format | |
| This tool supports **IIS W3C Extended Log Format** with the following fields: | |
| ``` | |
| date time s-ip cs-method cs-uri-stem cs-uri-query s-port cs-username | |
| c-ip cs(User-Agent) cs(Referer) sc-status sc-substatus sc-win32-status time-taken | |
| ``` | |
| ### Filtering Rules | |
| - Excludes lines with both `HEAD` method and `Zabbix` in User-Agent | |
| - 401 Unauthorized responses are excluded from error counts | |
| - Errors are defined as status codes β 200 and β 401 | |
| - Slow requests are those with response time > 3000ms (configurable) | |
| """) | |
| if __name__ == "__main__": | |
| main() | |