Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import os | |
| import plotly.express as px | |
| from datetime import datetime | |
| import json | |
| import requests | |
| # Remote log file URL | |
| REMOTE_LOG_URL = "https://huggingface.co/spaces/kwfricke/sc_agent/resolve/main/logs/chat_logs.csv" | |
| def load_logs(): | |
| """Load and process logs from remote CSV file""" | |
| print("πΉ Attempting to load logs from remote URL...") | |
| try: | |
| # Fetch CSV data from the remote URL | |
| response = requests.get(REMOTE_LOG_URL) | |
| response.raise_for_status() # Raise an exception for bad status codes | |
| # Read CSV data from the response content | |
| df = pd.read_csv( | |
| pd.io.common.StringIO(response.text), | |
| quoting=1, # Quote all fields | |
| escapechar='\\', # Use backslash as escape character | |
| doublequote=True # Allow double quotes within fields | |
| ) | |
| print(f"πΉ Raw data shape: {df.shape}") | |
| print(f"πΉ Columns found: {list(df.columns)}") | |
| # Convert timestamp | |
| try: | |
| df['timestamp'] = pd.to_datetime(df['timestamp']) | |
| except Exception as e: | |
| print(f"β οΈ Error converting timestamps: {str(e)}") | |
| df['timestamp'] = pd.Timestamp.now() | |
| # Handle query_params column | |
| try: | |
| df['query_params'] = df['query_params'].fillna('{}') | |
| df['query_params'] = df['query_params'].apply( | |
| lambda x: json.loads(x) if isinstance(x, str) else x | |
| ) | |
| except Exception as e: | |
| print(f"β οΈ Error parsing query parameters: {str(e)}") | |
| df['query_params'] = df['query_params'].apply(lambda x: {}) | |
| print(f"β Successfully processed {len(df)} log entries") | |
| # Print first row for debugging | |
| if not df.empty: | |
| print("First row of data:") | |
| print(df.iloc[0].to_dict()) | |
| return df | |
| except Exception as e: | |
| print(f"β Error loading logs from remote URL: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| return pd.DataFrame() | |
| def filter_logs(df, start_date=None, end_date=None, account_id=None, api_status=None): | |
| """Filter logs based on criteria""" | |
| if df.empty: | |
| return df | |
| filtered_df = df.copy() | |
| if start_date: | |
| filtered_df = filtered_df[filtered_df['timestamp'] >= start_date] | |
| if end_date: | |
| filtered_df = filtered_df[filtered_df['timestamp'] <= end_date] | |
| if account_id: | |
| filtered_df = filtered_df[filtered_df['account_id'] == account_id] | |
| if api_status: | |
| filtered_df = filtered_df[filtered_df['api_status'] == api_status] | |
| return filtered_df | |
| def create_summary_stats(df): | |
| """Create summary statistics from logs""" | |
| if df.empty: | |
| return "No data available" | |
| total_interactions = len(df) | |
| success_rate = (df['api_status'] == 'success').mean() * 100 | |
| avg_results = df[df['total_results'].notna()]['total_results'].mean() | |
| unique_accounts = df['account_id'].nunique() | |
| # Add most common query parameters if available | |
| common_params = "" | |
| if 'query_params' in df.columns: | |
| param_series = df['query_params'].dropna() | |
| if not param_series.empty: | |
| # Extract and count common parameters | |
| param_counts = {} | |
| for params in param_series: | |
| if isinstance(params, dict): | |
| for k, v in params.items(): | |
| if v is not None and v != '': | |
| param_counts[f"{k}: {v}"] = param_counts.get(f"{k}: {v}", 0) + 1 | |
| if param_counts: | |
| top_params = sorted(param_counts.items(), key=lambda x: x[1], reverse=True)[:3] | |
| common_params = "\n\nMost Common Query Parameters:\n" + "\n".join([f"- {param} (used {count} times)" for param, count in top_params]) | |
| summary = f""" | |
| π Log Summary: | |
| - Total Interactions: {total_interactions} | |
| - Success Rate: {success_rate:.1f}% | |
| - Average Results per Query: {avg_results:.1f} | |
| - Unique Accounts: {unique_accounts} | |
| {common_params} | |
| """ | |
| return summary | |
| def create_time_series(df): | |
| """Create time series plot of interactions""" | |
| if df.empty: | |
| return None | |
| daily_counts = df.groupby(df['timestamp'].dt.date).size().reset_index() | |
| daily_counts.columns = ['Date', 'Interactions'] | |
| fig = px.line(daily_counts, x='Date', y='Interactions', | |
| title='Daily Interactions Over Time') | |
| fig.update_layout( | |
| xaxis_title="Date", | |
| yaxis_title="Number of Interactions", | |
| showlegend=False | |
| ) | |
| return fig | |
| def create_status_pie(df): | |
| """Create pie chart of API status distribution""" | |
| if df.empty: | |
| return None | |
| status_counts = df['api_status'].value_counts() | |
| fig = px.pie(values=status_counts.values, | |
| names=status_counts.index, | |
| title='API Status Distribution') | |
| return fig | |
| def refresh_data(start_date, end_date, account_id, api_status): | |
| """Refresh all components with filtered data""" | |
| print("πΉ Refreshing data...") | |
| print(f"Parameters: start_date={start_date}, end_date={end_date}, account_id={account_id}, api_status={api_status}") | |
| df = load_logs() # Now loads from remote URL | |
| if df.empty: | |
| print("β No data available") | |
| return "No log data available", None, None, [] | |
| filtered_df = filter_logs(df, start_date, end_date, account_id, api_status) | |
| print(f"πΉ Filtered to {len(filtered_df)} entries") | |
| # Update all visualizations | |
| summary = create_summary_stats(filtered_df) | |
| time_plot = create_time_series(filtered_df) | |
| status_plot = create_status_pie(filtered_df) | |
| # Prepare table data - convert to list of lists for Gradio DataFrame | |
| try: | |
| table_data = filtered_df.sort_values('timestamp', ascending=False) | |
| records = table_data.to_dict('records') | |
| table_rows = [] | |
| for record in records: | |
| row = [ | |
| str(record.get('timestamp', '')), | |
| str(record.get('interaction_id', '')), | |
| str(record.get('account_id', '')), | |
| str(record.get('query', '')), | |
| str(record.get('query_params', '')), | |
| str(record.get('response', '')), | |
| str(record.get('total_results', '')), | |
| str(record.get('page', '')), | |
| str(record.get('like_score', '')), | |
| str(record.get('api_status', '')) | |
| ] | |
| table_rows.append(row) | |
| except Exception as e: | |
| print(f"β Error preparing table data: {str(e)}") | |
| table_rows = [] | |
| print("β Refresh complete") | |
| return summary, time_plot, status_plot, table_rows | |
| # Create the Gradio interface | |
| with gr.Blocks(title="Chat Log Viewer") as app: | |
| gr.Markdown("# π Chat Log Viewer") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| start_date = gr.DateTime(label="Start Date", value=None) | |
| end_date = gr.DateTime(label="End Date", value=None) | |
| account_id = gr.Textbox(label="Account ID (optional)") | |
| api_status = gr.Dropdown( | |
| choices=["", "success", "error"], | |
| label="API Status", | |
| value="" | |
| ) | |
| refresh_btn = gr.Button("π Refresh", variant="primary") | |
| with gr.Row(): | |
| summary_text = gr.Markdown() | |
| with gr.Row(): | |
| with gr.Column(): | |
| time_series_plot = gr.Plot(label="Interactions Over Time") | |
| with gr.Column(): | |
| status_plot = gr.Plot(label="API Status Distribution") | |
| with gr.Row(): | |
| logs_table = gr.DataFrame( | |
| headers=[ | |
| "Timestamp", | |
| "Interaction ID", | |
| "Account ID", | |
| "Query", | |
| "Query Parameters", | |
| "Response", | |
| "Total Results", | |
| "Page", | |
| "Like Score", | |
| "API Status" | |
| ], | |
| wrap=True, | |
| value=load_logs().to_dict('records') # Load data immediately | |
| ) | |
| # Set up refresh functionality | |
| refresh_btn.click( | |
| refresh_data, | |
| inputs=[start_date, end_date, account_id, api_status], | |
| outputs=[summary_text, time_series_plot, status_plot, logs_table] | |
| ) | |
| if __name__ == "__main__": | |
| app.launch(share=True) |