sc_agent_logger / app.py
kwfricke's picture
Upload folder using huggingface_hub
34d001c verified
import gradio as gr
import pandas as pd
import os
import plotly.express as px
from datetime import datetime
import json
import requests
# Remote log file URL
REMOTE_LOG_URL = "https://huggingface.co/spaces/kwfricke/sc_agent/resolve/main/logs/chat_logs.csv"
def load_logs():
"""Load and process logs from remote CSV file"""
print("πŸ”Ή Attempting to load logs from remote URL...")
try:
# Fetch CSV data from the remote URL
response = requests.get(REMOTE_LOG_URL)
response.raise_for_status() # Raise an exception for bad status codes
# Read CSV data from the response content
df = pd.read_csv(
pd.io.common.StringIO(response.text),
quoting=1, # Quote all fields
escapechar='\\', # Use backslash as escape character
doublequote=True # Allow double quotes within fields
)
print(f"πŸ”Ή Raw data shape: {df.shape}")
print(f"πŸ”Ή Columns found: {list(df.columns)}")
# Convert timestamp
try:
df['timestamp'] = pd.to_datetime(df['timestamp'])
except Exception as e:
print(f"⚠️ Error converting timestamps: {str(e)}")
df['timestamp'] = pd.Timestamp.now()
# Handle query_params column
try:
df['query_params'] = df['query_params'].fillna('{}')
df['query_params'] = df['query_params'].apply(
lambda x: json.loads(x) if isinstance(x, str) else x
)
except Exception as e:
print(f"⚠️ Error parsing query parameters: {str(e)}")
df['query_params'] = df['query_params'].apply(lambda x: {})
print(f"βœ… Successfully processed {len(df)} log entries")
# Print first row for debugging
if not df.empty:
print("First row of data:")
print(df.iloc[0].to_dict())
return df
except Exception as e:
print(f"❌ Error loading logs from remote URL: {str(e)}")
import traceback
traceback.print_exc()
return pd.DataFrame()
def filter_logs(df, start_date=None, end_date=None, account_id=None, api_status=None):
"""Filter logs based on criteria"""
if df.empty:
return df
filtered_df = df.copy()
if start_date:
filtered_df = filtered_df[filtered_df['timestamp'] >= start_date]
if end_date:
filtered_df = filtered_df[filtered_df['timestamp'] <= end_date]
if account_id:
filtered_df = filtered_df[filtered_df['account_id'] == account_id]
if api_status:
filtered_df = filtered_df[filtered_df['api_status'] == api_status]
return filtered_df
def create_summary_stats(df):
"""Create summary statistics from logs"""
if df.empty:
return "No data available"
total_interactions = len(df)
success_rate = (df['api_status'] == 'success').mean() * 100
avg_results = df[df['total_results'].notna()]['total_results'].mean()
unique_accounts = df['account_id'].nunique()
# Add most common query parameters if available
common_params = ""
if 'query_params' in df.columns:
param_series = df['query_params'].dropna()
if not param_series.empty:
# Extract and count common parameters
param_counts = {}
for params in param_series:
if isinstance(params, dict):
for k, v in params.items():
if v is not None and v != '':
param_counts[f"{k}: {v}"] = param_counts.get(f"{k}: {v}", 0) + 1
if param_counts:
top_params = sorted(param_counts.items(), key=lambda x: x[1], reverse=True)[:3]
common_params = "\n\nMost Common Query Parameters:\n" + "\n".join([f"- {param} (used {count} times)" for param, count in top_params])
summary = f"""
πŸ“Š Log Summary:
- Total Interactions: {total_interactions}
- Success Rate: {success_rate:.1f}%
- Average Results per Query: {avg_results:.1f}
- Unique Accounts: {unique_accounts}
{common_params}
"""
return summary
def create_time_series(df):
"""Create time series plot of interactions"""
if df.empty:
return None
daily_counts = df.groupby(df['timestamp'].dt.date).size().reset_index()
daily_counts.columns = ['Date', 'Interactions']
fig = px.line(daily_counts, x='Date', y='Interactions',
title='Daily Interactions Over Time')
fig.update_layout(
xaxis_title="Date",
yaxis_title="Number of Interactions",
showlegend=False
)
return fig
def create_status_pie(df):
"""Create pie chart of API status distribution"""
if df.empty:
return None
status_counts = df['api_status'].value_counts()
fig = px.pie(values=status_counts.values,
names=status_counts.index,
title='API Status Distribution')
return fig
def refresh_data(start_date, end_date, account_id, api_status):
"""Refresh all components with filtered data"""
print("πŸ”Ή Refreshing data...")
print(f"Parameters: start_date={start_date}, end_date={end_date}, account_id={account_id}, api_status={api_status}")
df = load_logs() # Now loads from remote URL
if df.empty:
print("❌ No data available")
return "No log data available", None, None, []
filtered_df = filter_logs(df, start_date, end_date, account_id, api_status)
print(f"πŸ”Ή Filtered to {len(filtered_df)} entries")
# Update all visualizations
summary = create_summary_stats(filtered_df)
time_plot = create_time_series(filtered_df)
status_plot = create_status_pie(filtered_df)
# Prepare table data - convert to list of lists for Gradio DataFrame
try:
table_data = filtered_df.sort_values('timestamp', ascending=False)
records = table_data.to_dict('records')
table_rows = []
for record in records:
row = [
str(record.get('timestamp', '')),
str(record.get('interaction_id', '')),
str(record.get('account_id', '')),
str(record.get('query', '')),
str(record.get('query_params', '')),
str(record.get('response', '')),
str(record.get('total_results', '')),
str(record.get('page', '')),
str(record.get('like_score', '')),
str(record.get('api_status', ''))
]
table_rows.append(row)
except Exception as e:
print(f"❌ Error preparing table data: {str(e)}")
table_rows = []
print("βœ… Refresh complete")
return summary, time_plot, status_plot, table_rows
# Create the Gradio interface
with gr.Blocks(title="Chat Log Viewer") as app:
gr.Markdown("# πŸ“Š Chat Log Viewer")
with gr.Row():
with gr.Column(scale=1):
start_date = gr.DateTime(label="Start Date", value=None)
end_date = gr.DateTime(label="End Date", value=None)
account_id = gr.Textbox(label="Account ID (optional)")
api_status = gr.Dropdown(
choices=["", "success", "error"],
label="API Status",
value=""
)
refresh_btn = gr.Button("πŸ”„ Refresh", variant="primary")
with gr.Row():
summary_text = gr.Markdown()
with gr.Row():
with gr.Column():
time_series_plot = gr.Plot(label="Interactions Over Time")
with gr.Column():
status_plot = gr.Plot(label="API Status Distribution")
with gr.Row():
logs_table = gr.DataFrame(
headers=[
"Timestamp",
"Interaction ID",
"Account ID",
"Query",
"Query Parameters",
"Response",
"Total Results",
"Page",
"Like Score",
"API Status"
],
wrap=True,
value=load_logs().to_dict('records') # Load data immediately
)
# Set up refresh functionality
refresh_btn.click(
refresh_data,
inputs=[start_date, end_date, account_id, api_status],
outputs=[summary_text, time_series_plot, status_plot, logs_table]
)
if __name__ == "__main__":
app.launch(share=True)