Spaces:

berangerthomas
/

shadowlog

Running

App Files Files Community

berangerthomas commited on Mar 4, 2025

Commit

d1fd9e1

1 Parent(s): f972805

Add multiple views and graphs

Browse files

Files changed (14) hide show

app.py +28 -103
assets/logo.jpg +0 -0
assets/logo_large.png +0 -0
assets/no_text_logo.png +0 -0
assets/small_logo.png +0 -0
assets/small_logo_background.png +0 -0
assets/small_logo_grey_background.png +0 -0
assets/small_logo_no_text.png +0 -0
assets/transparent_logo.png +0 -0
pages/about.py +0 -0
pages/analyze.py +193 -0
pages/home.py +66 -0
pages/upload.py +114 -0
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -1,114 +1,39 @@
-#####################################################
-####                 Imports                     ####
-#####################################################
-import os
-import tempfile
-from datetime import datetime
 import streamlit as st
-from config.log_definitions import log_definitions
-from utils.log2pandas import LogParser
-from utils.pandas2sql import Pandas2SQL
-#####################################################
-####              Interface Setup               ####
-#####################################################
-st.title("ShadowLog - Log File Analyzer")
-st.write("Upload a log file to analyze and/or convert it to SQLite")
-# File upload widget
-uploaded_file = st.file_uploader("Choose a log file")
-# Get available log types from log_definitions
-log_types = list(log_definitions.keys())
-# Set default log type if not already in session state
-if "log_type" not in st.session_state:
-    st.session_state.log_type = log_types[0]  # Default to first log type
-st.session_state.log_type = st.selectbox(
-    "Select log type", log_types, index=log_types.index(st.session_state.log_type)
-)
-# Store the parsed dataframe in the session state
-if "parsed_df" not in st.session_state:
-    st.session_state.parsed_df = None
-if uploaded_file is not None:
-    # Create two columns for the buttons
-    col1, col2 = st.columns(2)
-    with col1:
-        # Button to parse the log file
-        if st.button("Parse the log file"):
-            with st.spinner("Analyzing the file..."):
-                # Create a temporary file
-                with tempfile.NamedTemporaryFile(
-                    delete=False, suffix=".log"
-                ) as tmp_file:
-                    tmp_file.write(uploaded_file.getvalue())
-                    tmp_path = tmp_file.name
-                try:
-                    # Parse the log file
-                    parser = LogParser(tmp_path, st.session_state.log_type)
-                    st.session_state.parsed_df = parser.parse_file()
-                    # Display a success message and the dataframe
-                    st.success("Log file successfully analyzed!")
-                    # st.dataframe(st.session_state.parsed_df)
-                except Exception as e:
-                    st.error(f"Error analyzing the file: {e}")
-                finally:
-                    # Clean up the temporary file
-                    os.unlink(tmp_path)
-    with col2:
-        # Button to convert to SQLite and download
-        if st.button("Convert to SQLite"):
-            if st.session_state.parsed_df is not None:
-                with st.spinner("Converting to SQLite..."):
-                    try:
-                        # Create a temporary SQLite file
-                        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-                        sqlite_path = os.path.join(
-                            tempfile.gettempdir(), f"log_data_{timestamp}.sqlite"
-                        )
-                        # Create the SQL converter
-                        sql_converter = Pandas2SQL(sqlite_path)
-                        # Convert the dataframe to SQLite
-                        sql_converter.create_table(
-                            st.session_state.parsed_df, st.session_state.log_type
-                        )
-                        # Read the SQLite file for download
-                        with open(sqlite_path, "rb") as file:
-                            sqlite_data = file.read()
-                        # Success message and immediate download
-                        st.success("SQLite file created successfully!")
-                        # Download button
-                        st.download_button(
-                            label="Download SQLite file",
-                            data=sqlite_data,
-                            file_name=f"log_file_{st.session_state.log_type}_{timestamp}.sqlite",
-                            mime="application/octet-stream",
-                            key="auto_download",
-                        )
-                    except Exception as e:
-                        st.error(f"Error converting to SQLite: {e}")
-                    finally:
-                        # Clean up the temporary file
-                        if os.path.exists(sqlite_path):
-                            os.unlink(sqlite_path)
-            else:
-                st.warning("Please parse the log file first.")
-# Display the dataframe if available
-if st.session_state.parsed_df is not None:
-    st.subheader("Analyzed log data")
-    st.dataframe(st.session_state.parsed_df)

+import base64
 import streamlit as st
+st.set_page_config(page_title=" ShadowLog ", page_icon="assets/logo.png", layout="wide")
+def add_logo():
+    # Lecture du fichier image local
+    with open("assets/small_logo_no_text.png", "rb") as f:
+        logo_data = base64.b64encode(f.read()).decode()
+    st.markdown(
+        f"""
+        <style>
+            [data-testid="stSidebarNav"] {{
+                background-image: url("data:image/png;base64,{logo_data}");
+                background-repeat: no-repeat;
+                padding-top: 225px;
+                background-position: center 20px;
+                background-size: 50%;
+            }}
+        </style>
+        """,
+        unsafe_allow_html=True,
+    )
+add_logo()
+# Pages definition
+home = st.Page("pages/home.py", title="🏠 Home")
+upload = st.Page("pages/upload.py", title="📥 Upload")
+analyze = st.Page("pages/analyze.py", title=" 📊 Analyze")
+about = st.Page("pages/about.py", title="📄 About")
+pg = st.navigation([home, upload, analyze, about])
+pg.run()

assets/logo.jpg ADDED Viewed

assets/logo_large.png ADDED Viewed

assets/no_text_logo.png ADDED Viewed

assets/small_logo.png ADDED Viewed

assets/small_logo_background.png ADDED Viewed

assets/small_logo_grey_background.png ADDED Viewed

assets/small_logo_no_text.png ADDED Viewed

assets/transparent_logo.png ADDED Viewed

pages/about.py ADDED Viewed

File without changes

pages/analyze.py ADDED Viewed

	@@ -0,0 +1,193 @@

+import pandas as pd
+import plotly.express as px
+import streamlit as st
+if "parsed_df" not in st.session_state:
+    st.session_state.parsed_df = None
+# Page title
+st.title("Data Analysis")
+# Loading data
+if st.session_state.parsed_df is None:
+    st.info("Please upload a log file on the 'Upload' page.")
+    st.stop()
+data = st.session_state.parsed_df
+# Sidebar for controls
+st.sidebar.header("Visualization Options")
+# Check if there are datetime columns
+datetime_columns = data.select_dtypes(include=["datetime64"]).columns.tolist()
+# Try to detect string columns that could be dates
+if not datetime_columns:
+    for col in data.select_dtypes(include=["object"]).columns:
+        try:
+            pd.to_datetime(data[col], errors="raise")
+            datetime_columns.append(col)
+        except (ValueError, TypeError):
+            pass
+# Chart type options
+chart_options = ["Pie Chart", "Bar Chart", "Histogram"]
+if datetime_columns:
+    chart_options.append("Time Series")
+chart_type = st.sidebar.selectbox("Choose chart type", chart_options)
+# Get categorical columns
+categorical_columns = data.select_dtypes(include=["object"]).columns.tolist()
+# Main area for visualization
+if chart_type == "Pie Chart":
+    st.header("Pie Chart")
+    # Select variable to visualize
+    selected_column = st.sidebar.selectbox(
+        "Select a categorical variable", categorical_columns
+    )
+    # Create and display pie chart
+    fig = px.pie(
+        data,
+        names=selected_column,
+        title=f"Distribution of '{selected_column}'",
+    )
+    st.plotly_chart(fig)
+    # Display value table
+    st.write("Value distribution:")
+    st.write(data[selected_column].value_counts())
+elif chart_type == "Bar Chart":
+    st.header("Bar Chart")
+    selected_column = st.sidebar.selectbox("Select a variable", categorical_columns)
+    results = data[selected_column].value_counts().reset_index()
+    results.columns = ["category", "count"]  # Explicitly rename columns
+    fig = px.bar(
+        results,
+        x="category",
+        y="count",
+        labels={"category": selected_column, "count": "Count"},
+    )
+    st.plotly_chart(fig)
+elif chart_type == "Histogram":
+    st.header("Histogram")
+    numerical_columns = data.select_dtypes(include=["int", "float"]).columns.tolist()
+    if numerical_columns:
+        selected_column = st.sidebar.selectbox(
+            "Select a numerical variable", numerical_columns
+        )
+        fig = px.histogram(data, x=selected_column)
+        st.plotly_chart(fig)
+    else:
+        st.write("No numerical columns available for histogram.")
+elif chart_type == "Time Series":
+    st.header("Time Series")
+    # Select datetime column for x-axis
+    datetime_col = st.sidebar.selectbox("Select datetime column", datetime_columns)
+    # Convert to datetime if needed
+    if data[datetime_col].dtype != "datetime64[ns]":
+        data[datetime_col] = pd.to_datetime(data[datetime_col])
+    # Get numerical columns for y-axis
+    numerical_columns = data.select_dtypes(include=["int", "float"]).columns.tolist()
+    if numerical_columns:
+        y_column = st.sidebar.selectbox("Select y-axis variable", numerical_columns)
+        # Option to aggregate data
+        if st.sidebar.checkbox("Aggregate by time period"):
+            period = st.sidebar.selectbox(
+                "Select period", ["Day", "Week", "Month", "Year"]
+            )
+            freq_map = {"Day": "D", "Week": "W", "Month": "M", "Year": "Y"}
+            grouped_data = (
+                data.groupby(pd.Grouper(key=datetime_col, freq=freq_map[period]))[
+                    y_column
+                ]
+                .mean()
+                .reset_index()
+            )
+            fig = px.line(
+                grouped_data,
+                x=datetime_col,
+                y=y_column,
+                title=f"{y_column} over time (by {period.lower()})",
+            )
+        else:
+            fig = px.line(
+                data.sort_values(by=datetime_col),
+                x=datetime_col,
+                y=y_column,
+                title=f"{y_column} over time",
+            )
+        st.plotly_chart(fig)
+    else:
+        st.write("No numerical columns available for y-axis.")
+# Option to display raw data
+if st.sidebar.checkbox("Show raw data"):
+    st.subheader("Data")
+    if chart_type in ["Pie Chart", "Bar Chart"]:
+        # For categorical charts, allow filtering by category
+        filter_option = st.selectbox(
+            f"Filter by {selected_column}:",
+            ["Show all data"] + sorted(data[selected_column].unique().tolist()),
+        )
+        if filter_option != "Show all data":
+            filtered_data = data[data[selected_column] == filter_option]
+            st.write(filtered_data)
+        else:
+            st.write(data)
+    elif chart_type == "Histogram" and numerical_columns:
+        # For histogram, allow filtering by value range
+        min_val = float(data[selected_column].min())
+        max_val = float(data[selected_column].max())
+        selected_range = st.slider(
+            f"Filter by {selected_column} range:", min_val, max_val, (min_val, max_val)
+        )
+        filtered_data = data[
+            (data[selected_column] >= selected_range[0])
+            & (data[selected_column] <= selected_range[1])
+        ]
+        st.write(filtered_data)
+    elif chart_type == "Time Series":
+        # For time series, filter by date range
+        min_date = data[datetime_col].min().date()
+        max_date = data[datetime_col].max().date()
+        date_range = st.date_input(
+            "Filter by date range",
+            value=[min_date, max_date],
+            min_value=min_date,
+            max_value=max_date,
+        )
+        if len(date_range) == 2:
+            start_date, end_date = date_range
+            filtered_data = data[
+                (data[datetime_col].dt.date >= start_date)
+                & (data[datetime_col].dt.date <= end_date)
+            ]
+            st.write(filtered_data)
+        else:
+            st.write(data)
+    else:
+        st.write(data)

pages/home.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import streamlit as st
+from PIL import Image
+# Page configuration
+# st.set_page_config(page_title="ShadowLog - Home", page_icon="📊", layout="wide")
+# Main page with logo
+try:
+    logo = Image.open("assets/logo_large.png")
+    st.image(logo, use_container_width=True)
+except FileNotFoundError:
+    st.error("Logo not found. Please check the path: assets/logo_large.png")
+# Main content
+st.title("Welcome to ShadowLog")
+st.markdown("### Your Advanced Log Analysis Platform")
+# Introduction
+st.write("""
+ShadowLog is a powerful tool designed to simplify and enhance log file analysis.
+Whether you're debugging an application, monitoring system performance, or investigating security incidents,
+ShadowLog provides the tools you need to efficiently process and extract insights from your log data.
+""")
+# Features section
+st.header("Key Features")
+col1, col2 = st.columns(2)
+with col1:
+    st.subheader("📁 Log File Upload")
+    st.write("""
+    Upload log files in various formats including text, JSON, CSV, and more.
+    Support for compressed files (.zip, .gz) is also available.
+    """)
+    st.subheader("🔍 Advanced Parsing")
+    st.write("""
+    Automatically detect log formats or configure custom parsing rules.
+    Extract timestamp, log level, and message content with ease.
+    """)
+with col2:
+    st.subheader("📊 Visual Analysis")
+    st.write("""
+    Generate insightful visualizations based on your log data.
+    Track patterns, anomalies, and trends to quickly identify issues.
+    """)
+    st.subheader("🔎 Search & Filter")
+    st.write("""
+    Powerful search functionality to find specific events or errors.
+    Filter logs by time, severity, source, or custom attributes.
+    """)
+# Getting started section
+st.header("Getting Started")
+st.write("""
+To begin analyzing your log files:
+1. Navigate to the 'Upload' page using the sidebar
+2. Upload your log file or select a sample file
+3. Configure parsing options if needed
+4. Explore the generated analysis and visualizations
+Check out the documentation for more detailed instructions and advanced features.
+""")

pages/upload.py ADDED Viewed

	@@ -0,0 +1,114 @@

+#####################################################
+####                 Imports                     ####
+#####################################################
+import os
+import tempfile
+from datetime import datetime
+import streamlit as st
+from config.log_definitions import log_definitions
+from utils.log2pandas import LogParser
+from utils.pandas2sql import Pandas2SQL
+#####################################################
+####              Interface Setup               ####
+#####################################################
+st.title("ShadowLog - Log File Analyzer")
+st.write("Upload a log file to analyze")
+# File upload widget
+uploaded_file = st.file_uploader("Choose a log file")
+# Get available log types from log_definitions
+log_types = list(log_definitions.keys())
+# Set default log type if not already in session state
+if "log_type" not in st.session_state:
+    st.session_state.log_type = log_types[0]  # Default to first log type
+st.session_state.log_type = st.selectbox(
+    "Select log type", log_types, index=log_types.index(st.session_state.log_type)
+)
+# Store the parsed dataframe in the session state
+if "parsed_df" not in st.session_state:
+    st.session_state.parsed_df = None
+if uploaded_file is not None:
+    # Create two columns for the buttons
+    col1, col2 = st.columns(2)
+    with col1:
+        # Button to parse the log file
+        if st.button("Parse the log file"):
+            with st.spinner("Analyzing the file..."):
+                # Create a temporary file
+                with tempfile.NamedTemporaryFile(
+                    delete=False, suffix=".log"
+                ) as tmp_file:
+                    tmp_file.write(uploaded_file.getvalue())
+                    tmp_path = tmp_file.name
+                try:
+                    # Parse the log file
+                    parser = LogParser(tmp_path, st.session_state.log_type)
+                    st.session_state.parsed_df = parser.parse_file()
+                    # Display a success message and the dataframe
+                    st.success("Log file successfully analyzed!")
+                    # st.dataframe(st.session_state.parsed_df)
+                except Exception as e:
+                    st.error(f"Error analyzing the file: {e}")
+                finally:
+                    # Clean up the temporary file
+                    os.unlink(tmp_path)
+    with col2:
+        # Button to convert to SQLite and download
+        if st.button("Convert to SQLite"):
+            if st.session_state.parsed_df is not None:
+                with st.spinner("Converting to SQLite..."):
+                    try:
+                        # Create a temporary SQLite file
+                        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+                        sqlite_path = os.path.join(
+                            tempfile.gettempdir(), f"log_data_{timestamp}.sqlite"
+                        )
+                        # Create the SQL converter
+                        sql_converter = Pandas2SQL(sqlite_path)
+                        # Convert the dataframe to SQLite
+                        sql_converter.create_table(
+                            st.session_state.parsed_df, st.session_state.log_type
+                        )
+                        # Read the SQLite file for download
+                        with open(sqlite_path, "rb") as file:
+                            sqlite_data = file.read()
+                        # Success message and immediate download
+                        st.success("SQLite file created successfully!")
+                        # Download button
+                        st.download_button(
+                            label="Download SQLite file",
+                            data=sqlite_data,
+                            file_name=f"log_file_{st.session_state.log_type}_{timestamp}.sqlite",
+                            mime="application/octet-stream",
+                            key="auto_download",
+                        )
+                    except Exception as e:
+                        st.error(f"Error converting to SQLite: {e}")
+                    finally:
+                        # Clean up the temporary file
+                        if os.path.exists(sqlite_path):
+                            os.unlink(sqlite_path)
+            else:
+                st.warning("Please parse the log file first.")
+# Display the dataframe if available
+if st.session_state.parsed_df is not None:
+    st.subheader("Analyzed log data")
+    st.dataframe(st.session_state.parsed_df)

requirements.txt CHANGED Viewed

@@ -1,2 +1,3 @@
 pandas
-streamlit

 pandas
+streamlit
+plotly