odatalogparser / app.py
pilotstuki's picture
Fix slow request threshold not applying from UI
113119c
"""
IIS Log Analyzer - Streamlit Application
High-performance log analysis tool for large IIS log files (200MB-1GB+)
"""
import streamlit as st
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import pandas as pd
from pathlib import Path
import tempfile
from typing import List
import time
from log_parser import IISLogParser, LogAnalyzer, analyze_multiple_logs
# Page configuration
st.set_page_config(
page_title="IIS Log Analyzer",
page_icon="πŸ“Š",
layout="wide",
initial_sidebar_state="expanded"
)
# Custom CSS
st.markdown("""
<style>
.metric-card {
background-color: #f0f2f6;
padding: 20px;
border-radius: 10px;
margin: 10px 0;
}
.error-metric {
background-color: #ffebee;
}
.success-metric {
background-color: #e8f5e9;
}
.warning-metric {
background-color: #fff3e0;
}
</style>
""", unsafe_allow_html=True)
def format_number(num: int) -> str:
"""Format large numbers with thousand separators."""
return f"{num:,}"
def create_summary_table(stats: dict) -> pd.DataFrame:
"""Create summary statistics table."""
# Get threshold in seconds for display
threshold_ms = stats.get("slow_threshold", 3000)
threshold_display = f">{threshold_ms}ms" if threshold_ms >= 1000 else f">{threshold_ms}ms"
data = {
"Metric": [
"Total Requests (before filtering)",
"Excluded Requests (HEAD+Zabbix + 401)",
"Processed Requests",
"Errors (β‰ 200, β‰ 401)",
f"Slow Requests ({threshold_display})",
"Peak RPS",
"Peak Timestamp",
"Avg Response Time (ms)",
"Max Response Time (ms)",
"Min Response Time (ms)",
],
"Value": [
format_number(stats["total_requests_before"]),
format_number(stats["excluded_requests"]),
format_number(stats["total_requests_after"]),
format_number(stats["errors"]),
format_number(stats["slow_requests"]),
format_number(stats["peak_rps"]),
stats["peak_timestamp"] or "N/A",
format_number(stats["avg_time_ms"]),
format_number(stats["max_time_ms"]),
format_number(stats["min_time_ms"]),
]
}
return pd.DataFrame(data)
def create_response_time_chart(dist: dict, title: str) -> go.Figure:
"""Create response time distribution chart."""
labels = list(dist.keys())
values = list(dist.values())
fig = go.Figure(data=[
go.Bar(
x=labels,
y=values,
marker_color='lightblue',
text=values,
textposition='auto',
)
])
fig.update_layout(
title=title,
xaxis_title="Response Time Range",
yaxis_title="Request Count",
height=400,
showlegend=False
)
return fig
def create_top_methods_chart(methods: List[dict], title: str) -> go.Figure:
"""Create top methods bar chart."""
if not methods:
return go.Figure()
df = pd.DataFrame(methods)
fig = make_subplots(
rows=1, cols=2,
subplot_titles=("Request Count", "Avg Response Time (ms)")
)
# Request count
fig.add_trace(
go.Bar(
x=df["method_name"],
y=df["count"],
name="Count",
marker_color='steelblue',
text=df["count"],
textposition='auto',
),
row=1, col=1
)
# Average time
fig.add_trace(
go.Bar(
x=df["method_name"],
y=df["avg_time"].round(1),
name="Avg Time",
marker_color='coral',
text=df["avg_time"].round(1),
textposition='auto',
),
row=1, col=2
)
fig.update_layout(
title_text=title,
height=400,
showlegend=False
)
return fig
def create_metrics_comparison(individual_stats: List[dict]) -> go.Figure:
"""Create comparison chart for multiple services."""
services = [s["summary"]["service_name"] for s in individual_stats]
requests = [s["summary"]["total_requests_after"] for s in individual_stats]
errors = [s["summary"]["errors"] for s in individual_stats]
avg_times = [s["summary"]["avg_time_ms"] for s in individual_stats]
fig = make_subplots(
rows=1, cols=3,
subplot_titles=("Processed Requests", "Errors", "Avg Response Time (ms)"),
specs=[[{"type": "bar"}, {"type": "bar"}, {"type": "bar"}]]
)
fig.add_trace(
go.Bar(x=services, y=requests, marker_color='lightblue', text=requests, textposition='auto'),
row=1, col=1
)
fig.add_trace(
go.Bar(x=services, y=errors, marker_color='salmon', text=errors, textposition='auto'),
row=1, col=2
)
fig.add_trace(
go.Bar(x=services, y=avg_times, marker_color='lightgreen', text=avg_times, textposition='auto'),
row=1, col=3
)
fig.update_layout(
title_text="Service Comparison",
height=400,
showlegend=False
)
return fig
def process_log_file(file_path: str, service_name: str = None, slow_threshold: int = 3000) -> dict:
"""Process a single log file and return statistics."""
parser = IISLogParser(file_path)
if service_name:
parser.service_name = service_name
with st.spinner(f"Parsing {Path(file_path).name}..."):
df = parser.parse()
if df.height == 0:
st.error(f"No valid log entries found in {Path(file_path).name}")
return None
with st.spinner(f"Analyzing {parser.service_name}..."):
analyzer = LogAnalyzer(df, parser.service_name, slow_threshold)
stats = {
"summary": analyzer.get_summary_stats(),
"top_methods": analyzer.get_top_methods(),
"error_breakdown": analyzer.get_error_breakdown(),
"errors_by_method": analyzer.get_errors_by_method(n=10),
"response_time_dist": analyzer.get_response_time_distribution(),
"analyzer": analyzer, # Keep reference for detailed error queries
}
return stats
def main():
st.title("πŸ“Š IIS Log Performance Analyzer")
st.markdown("High-performance analysis tool for large IIS log files (up to 1GB+)")
# Sidebar
st.sidebar.header("Configuration")
# File upload mode
upload_mode = st.sidebar.radio(
"Upload Mode",
["Single File", "Multiple Files"],
help="Analyze one or multiple log files"
)
# File uploader
if upload_mode == "Single File":
uploaded_files = st.sidebar.file_uploader(
"Upload IIS Log File",
type=["log", "txt"],
help="Upload IIS W3C Extended format log file"
)
uploaded_files = [uploaded_files] if uploaded_files else []
else:
uploaded_files = st.sidebar.file_uploader(
"Upload IIS Log Files",
type=["log", "txt"],
accept_multiple_files=True,
help="Upload multiple IIS log files for comparison"
)
# Analysis options
st.sidebar.header("Analysis Options")
show_top_n = st.sidebar.slider("Top N Methods", 3, 20, 5)
slow_threshold = st.sidebar.number_input(
"Slow Request Threshold (ms)",
min_value=100,
max_value=10000,
value=3000,
step=100
)
# Process files
if uploaded_files:
st.info(f"Processing {len(uploaded_files)} file(s)...")
# Save uploaded files to temp directory
temp_files = []
for uploaded_file in uploaded_files:
with tempfile.NamedTemporaryFile(delete=False, suffix=".log") as tmp:
tmp.write(uploaded_file.getvalue())
temp_files.append(tmp.name)
start_time = time.time()
# Process each file
all_stats = []
for i, temp_file in enumerate(temp_files):
file_name = uploaded_files[i].name
st.subheader(f"πŸ“„ {file_name}")
stats = process_log_file(temp_file, None, slow_threshold)
if stats:
all_stats.append(stats)
# Display summary metrics
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric(
"Total Requests",
format_number(stats["summary"]["total_requests_after"])
)
with col2:
st.metric(
"Errors",
format_number(stats["summary"]["errors"]),
delta=None,
delta_color="inverse"
)
with col3:
st.metric(
"Avg Time (ms)",
format_number(stats["summary"]["avg_time_ms"])
)
with col4:
st.metric(
"Peak RPS",
format_number(stats["summary"]["peak_rps"])
)
# Tabs for detailed analysis
tab1, tab2, tab3, tab4, tab5 = st.tabs([
"Summary", "Top Methods", "Response Time", "Error Breakdown", "Errors by Method"
])
with tab1:
st.dataframe(
create_summary_table(stats["summary"]),
hide_index=True,
use_container_width=True
)
with tab2:
if stats["top_methods"]:
st.plotly_chart(
create_top_methods_chart(
stats["top_methods"][:show_top_n],
f"Top {show_top_n} Methods - {stats['summary']['service_name']}"
),
use_container_width=True
)
# Show table
methods_df = pd.DataFrame(stats["top_methods"][:show_top_n])
methods_df["avg_time"] = methods_df["avg_time"].round(1)
st.dataframe(methods_df, hide_index=True, use_container_width=True)
else:
st.info("No method data available")
with tab3:
if stats["response_time_dist"]:
st.plotly_chart(
create_response_time_chart(
stats["response_time_dist"],
f"Response Time Distribution - {stats['summary']['service_name']}"
),
use_container_width=True
)
else:
st.info("No response time distribution data")
with tab4:
if stats["error_breakdown"]:
error_df = pd.DataFrame(stats["error_breakdown"])
error_df.columns = ["Status Code", "Count"]
st.dataframe(error_df, hide_index=True, use_container_width=True)
# Pie chart
fig = px.pie(
error_df,
values="Count",
names="Status Code",
title=f"Error Distribution - {stats['summary']['service_name']}"
)
st.plotly_chart(fig, use_container_width=True)
else:
st.success("No errors found! βœ“")
with tab5:
st.markdown("### πŸ” Errors by Method")
st.markdown("This view shows which specific methods are causing errors, with full context for debugging.")
if stats["errors_by_method"]:
# Display summary table
errors_method_df = pd.DataFrame(stats["errors_by_method"])
errors_method_df["error_rate_percent"] = errors_method_df["error_rate_percent"].round(2)
errors_method_df["avg_response_time_ms"] = errors_method_df["avg_response_time_ms"].round(1)
# Rename columns for better display
errors_method_df.columns = [
"Method Path", "Total Calls", "Error Count",
"Most Common Error", "Avg Response Time (ms)", "Error Rate (%)"
]
st.dataframe(errors_method_df, hide_index=True, use_container_width=True)
# Bar chart of top error-prone methods
fig = go.Figure()
fig.add_trace(go.Bar(
x=errors_method_df["Method Path"],
y=errors_method_df["Error Count"],
marker_color='red',
text=errors_method_df["Error Count"],
textposition='auto',
name="Error Count"
))
fig.update_layout(
title=f"Top Error-Prone Methods - {stats['summary']['service_name']}",
xaxis_title="Method Path",
yaxis_title="Error Count",
height=400,
showlegend=False
)
st.plotly_chart(fig, use_container_width=True)
# Allow users to drill down into specific methods
st.markdown("#### πŸ”Ž Detailed Error Logs")
selected_method = st.selectbox(
"Select a method to view detailed error logs:",
options=["All"] + errors_method_df["Method Path"].tolist(),
key=f"method_select_{file_name}"
)
if selected_method and selected_method != "All":
error_details = stats["analyzer"].get_error_details(
method_path=selected_method,
limit=50
)
if error_details:
details_df = pd.DataFrame(error_details)
st.dataframe(details_df, hide_index=True, use_container_width=True)
st.info(f"Showing up to 50 most recent errors for {selected_method}")
else:
st.info(f"No error details found for {selected_method}")
elif selected_method == "All":
error_details = stats["analyzer"].get_error_details(limit=50)
if error_details:
details_df = pd.DataFrame(error_details)
st.dataframe(details_df, hide_index=True, use_container_width=True)
st.info("Showing up to 50 most recent errors across all methods")
else:
st.success("No errors found in any methods! βœ“")
st.divider()
# Multi-file comparison
if len(all_stats) > 1:
st.header("πŸ“Š Service Comparison")
st.plotly_chart(
create_metrics_comparison(all_stats),
use_container_width=True
)
# Combined summary
st.subheader("Combined Statistics")
combined = {
"total_requests_before": sum(s["summary"]["total_requests_before"] for s in all_stats),
"excluded_requests": sum(s["summary"]["excluded_requests"] for s in all_stats),
"total_requests_after": sum(s["summary"]["total_requests_after"] for s in all_stats),
"errors": sum(s["summary"]["errors"] for s in all_stats),
"slow_requests": sum(s["summary"]["slow_requests"] for s in all_stats),
}
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Total Requests (All Services)", format_number(combined["total_requests_after"]))
with col2:
st.metric("Total Errors (All Services)", format_number(combined["errors"]))
with col3:
st.metric("Total Slow Requests (All Services)", format_number(combined["slow_requests"]))
processing_time = time.time() - start_time
st.success(f"βœ“ Analysis completed in {processing_time:.2f} seconds")
# Clean up temp files
for temp_file in temp_files:
Path(temp_file).unlink(missing_ok=True)
else:
# Welcome screen
st.info("πŸ‘† Upload one or more IIS log files to begin analysis")
st.markdown("""
### Features
- ⚑ **Fast processing** of large files (200MB-1GB+) using Polars
- πŸ“Š **Comprehensive metrics**: RPS, response times, error rates
- πŸ” **Detailed analysis**: Top methods, error breakdown, time distribution
- πŸ“ˆ **Visual reports**: Interactive charts with Plotly
- πŸ”„ **Multi-file support**: Compare multiple services side-by-side
### Log Format
This tool supports **IIS W3C Extended Log Format** with the following fields:
```
date time s-ip cs-method cs-uri-stem cs-uri-query s-port cs-username
c-ip cs(User-Agent) cs(Referer) sc-status sc-substatus sc-win32-status time-taken
```
### Filtering Rules
- Excludes lines with both `HEAD` method and `Zabbix` in User-Agent
- 401 Unauthorized responses are excluded from error counts
- Errors are defined as status codes β‰  200 and β‰  401
- Slow requests are those with response time > 3000ms (configurable)
""")
if __name__ == "__main__":
main()