import gradio as gr import pandas as pd import asyncio from datetime import datetime, timedelta, timezone import plotly.express as px import plotly.graph_objects as go from CosmosDBHandlers.cosmosChatHistoryHandler import ChatMemoryHandlerForAnalytics class ChatAnalyticsDashboard: def __init__(self): self.handler = ChatMemoryHandlerForAnalytics() async def get_chat_statistics(self): """Get basic chat statistics - Fixed version""" try: # Get total chats - this works total_query = "SELECT VALUE COUNT(1) FROM c" total_chats = list(self.handler.chat_container.query_items( query=total_query, enable_cross_partition_query=True ))[0] # Get unique sessions - fetch all and count in Python session_query = "SELECT c.sessionId FROM c" session_results = list(self.handler.chat_container.query_items( query=session_query, enable_cross_partition_query=True )) unique_sessions = len(set(item['sessionId'] for item in session_results)) # Get function usage - fetch all and group in Python function_query = "SELECT c.functionUsed FROM c" function_results = list(self.handler.chat_container.query_items( query=function_query, enable_cross_partition_query=True )) # Count function usage in Python from collections import Counter function_counts = Counter(item['functionUsed'] for item in function_results) function_usage = [ {'functionUsed': func, 'count': count} for func, count in function_counts.items() ] return { 'total_chats': total_chats, 'unique_sessions': unique_sessions, 'function_usage': function_usage } except Exception as e: print(f"Error getting statistics: {e}") return {'total_chats': 0, 'unique_sessions': 0, 'function_usage': []} async def get_recent_chats(self, limit=10): """Get recent chat interactions""" try: query = f""" SELECT TOP {limit} c.sessionId, c.question, c.functionUsed, c.answer, c.timestamp FROM c ORDER BY c.timestamp DESC """ results = list(self.handler.chat_container.query_items( query=query, enable_cross_partition_query=True )) return results except Exception as e: print(f"Error getting recent chats: {e}") return [] async def get_chat_timeline(self, days=7): """Enhanced timeline data with minute-level precision""" try: start_date = (datetime.now(timezone.utc) - timedelta(days=days)).isoformat() query = f""" SELECT c.timestamp, c.functionUsed FROM c WHERE c.timestamp >= '{start_date}' ORDER BY c.timestamp """ results = list(self.handler.chat_container.query_items( query=query, enable_cross_partition_query=True )) # Process for timeline with minute precision timeline_data = [] for item in results: date = datetime.fromisoformat(item['timestamp'].replace('Z', '+00:00')) timeline_data.append({ 'date': date.strftime('%Y-%m-%d'), 'hour': date.hour, 'minute': date.minute, 'datetime': date, 'function': item['functionUsed'] }) return timeline_data except Exception as e: print(f"Error getting timeline: {e}") return [] # Initialize dashboard dashboard = ChatAnalyticsDashboard() def sync_wrapper(async_func): """Wrapper to run async functions in Gradio""" def wrapper(*args, **kwargs): try: loop = asyncio.get_running_loop() except RuntimeError: loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) return loop.run_until_complete(async_func(*args, **kwargs)) return wrapper @sync_wrapper async def update_sql_statistics(): """Update SQL query statistics """ stats = await dashboard.handler.get_sql_query_statistics() # Create success rate chart with correct state values if stats['total_queries'] > 0: state_data = pd.DataFrame([ {'State': 'Success', 'Count': stats['success_count']}, {'State': 'Error', 'Count': stats['error_count']}, {'State': 'Null', 'Count': stats['null_count']} # Changed from 'Failed' ]) state_chart = px.pie(state_data, values='Count', names='State', title='SQL Query Success Rate', color_discrete_map={'Success': '#10b981', 'Error': '#ef4444', 'Null': '#6b7280'}) else: state_chart = px.pie(values=[1], names=['No Data'], title='SQL Query Success Rate') # Create top questions chart if stats['top_questions']: questions_df = pd.DataFrame(stats['top_questions']) questions_chart = px.bar(questions_df.head(5), x='count', y='question', orientation='h', title='Top 5 Most Generated Queries') questions_chart.update_layout(yaxis={'categoryorder': 'total ascending'}) else: questions_chart = px.bar(x=[0], y=['No Data'], title='Top Generated Queries') return ( f"**Total SQL Queries:** {stats['total_queries']}", f"**Success Rate:** {stats['success_rate']:.1f}%", f"**Error/Null Queries:** {stats['error_count'] + stats['null_count']}", # Updated label state_chart, questions_chart ) @sync_wrapper async def get_recent_sql_queries(): """Get recent SQL query generations""" recent = await dashboard.handler.get_recent_sql_queries(limit=15) if recent: recent_data = [] for query in recent: recent_data.append({ 'Original Question': query['originalQuestion'][:60] + '...' if len(query['originalQuestion']) > 60 else query['originalQuestion'], 'Generated SQL': query['generatedSql'][:80] + '...' if len(query['generatedSql']) > 80 else query['generatedSql'], 'State': query['state'], 'Timestamp': datetime.fromisoformat(query['timestamp'].replace('Z', '+00:00')).strftime('%Y-%m-%d %H:%M') }) return pd.DataFrame(recent_data) else: return pd.DataFrame({'Message': ['No recent SQL queries']}) @sync_wrapper async def get_sql_error_analysis(): """Get failed SQL query analysis""" errors = await dashboard.handler.get_sql_error_analysis() if errors: error_data = [] for error in errors[:10]: # Limit to 10 most recent errors error_data.append({ 'Original Question': error['originalQuestion'][:50] + '...' if len(error['originalQuestion']) > 50 else error['originalQuestion'], 'Generated SQL': error['generatedSql'][:60] + '...' if len(error['generatedSql']) > 60 else error['generatedSql'], 'State': error['state'], 'Timestamp': datetime.fromisoformat(error['timestamp'].replace('Z', '+00:00')).strftime('%Y-%m-%d %H:%M') }) return pd.DataFrame(error_data) else: return pd.DataFrame({'Message': ['No failed queries found']}) @sync_wrapper async def update_statistics(): """Update dashboard statistics""" stats = await dashboard.get_chat_statistics() # Create function usage chart if stats['function_usage']: func_df = pd.DataFrame(stats['function_usage']) func_chart = px.pie(func_df, values='count', names='functionUsed', title='Function Usage Distribution') else: func_chart = px.pie(values=[1], names=['No Data'], title='Function Usage Distribution') return ( f"**Total Chats:** {stats['total_chats']}", f"**Unique Sessions:** {stats['unique_sessions']}", func_chart ) @sync_wrapper async def update_timeline(days): """Enhanced timeline function with adaptive granularity""" timeline_data = await dashboard.get_chat_timeline(days) if not timeline_data: # Return empty chart if no data empty_fig = go.Figure() empty_fig.add_annotation( text="No data available for selected period", xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False ) empty_fig.update_layout(title="Chat Activity Timeline") return empty_fig df = pd.DataFrame(timeline_data) if days > 1: # Multi-day view: Group by date for daily line plot daily_counts = df.groupby('date').size().reset_index(name='count') daily_counts['date'] = pd.to_datetime(daily_counts['date']) timeline_chart = px.line( daily_counts, x='date', y='count', title=f'Daily Chat Activity - Last {days} Days', markers=True, line_shape='linear' ) timeline_chart.update_layout( xaxis_title="Date", yaxis_title="Number of Chats", hovermode='x unified' ) # In the single day section of update_timeline: else: # Single day view: Group by 15-minute intervals df['datetime'] = pd.to_datetime(df['date'] + ' ' + df['hour'].astype(str) + ':' + df['minute'].astype(str) + ':00') # Create 15-minute intervals df['interval'] = df['datetime'].dt.floor('15min') interval_counts = df.groupby('interval').size().reset_index(name='count') timeline_chart = px.line( interval_counts, x='interval', y='count', title=f'Chat Activity by 15-min Intervals - {interval_counts.iloc[0]["interval"].strftime("%Y-%m-%d")}', markers=True, line_shape='linear' ) timeline_chart.update_layout( xaxis_title="Time", yaxis_title="Number of Chats", xaxis=dict( tickformat='%H:%M', dtick=900000 # 15-minute intervals ), hovermode='x unified' ) return timeline_chart @sync_wrapper async def get_faqs(): """Get semantic FAQs with duplicate removal""" # Request more items than needed to account for duplicates faqs = await dashboard.handler.get_semantic_faqs(limit=15) # Request more than the 10 we want if faqs: # Extract representative questions questions = [faq['representative_question'] for faq in faqs] # Remove duplicates while preserving order unique_questions = list(dict.fromkeys(questions)) # Rebuild FAQ data with unique questions only unique_faqs = [] seen_questions = set() for faq in faqs: question = faq['representative_question'] if question not in seen_questions: seen_questions.add(question) unique_faqs.append({ 'Question': question[:100] + '...' if len(question) > 100 else question, 'Similar Questions Count': len(faq['similar_questions']), 'Total Occurrences': faq['total_occurrences'] }) # Stop once we have 10 unique items if len(unique_faqs) >= 10: break return pd.DataFrame(unique_faqs) else: return pd.DataFrame({'Message': ['No FAQ data available']}) @sync_wrapper async def get_recent_interactions(): """Get recent chat interactions""" recent = await dashboard.get_recent_chats(limit=20) if recent: recent_data = [] for chat in recent: recent_data.append({ 'Session ID': chat['sessionId'][:8] + '...', 'Question': chat['question'][:50] + '...' if len(chat['question']) > 50 else chat['question'], 'Function': chat['functionUsed'], 'Timestamp': datetime.fromisoformat(chat['timestamp'].replace('Z', '+00:00')).strftime('%Y-%m-%d %H:%M') }) return pd.DataFrame(recent_data) else: return pd.DataFrame({'Message': ['No recent interactions']}) theme = gr.themes.Citrus( secondary_hue="amber", font=[gr.themes.GoogleFont('Inter'), 'ui-sans-serif', 'system-ui', 'sans-serif'], font_mono=[gr.themes.GoogleFont('Roboto Mono'), 'ui-monospace', 'Consolas', 'monospace'], ) with gr.Blocks(theme=theme, title="TAL Chat Analytics Dashboard") as demo: gr.Markdown("# Chat Analytics Dashboard") gr.Markdown("### Real-time analytics for TAL Chatbot") with gr.Row(): total_chats = gr.Markdown("**Total Chats:** Loading...") unique_sessions = gr.Markdown("**Unique Sessions:** Loading...") with gr.Tabs(): with gr.TabItem("Function Usage Distribution"): function_chart = gr.Plot(label="Function Usage Distribution") with gr.TabItem("📈 Timeline Analysis"): days_slider = gr.Slider(minimum=1, maximum=30, value=7, step=1, label="Days to analyze") with gr.Row(): timeline_plot = gr.Plot(label="Daily Chat Activity") with gr.TabItem("❓ Frequently Asked Questions"): faq_table = gr.DataFrame(label="Semantic FAQs", interactive=False) with gr.TabItem("💬 Recent Interactions"): recent_table = gr.DataFrame(label="Recent Chat Interactions", interactive=False) with gr.TabItem("🔍 SQL Query Analytics", elem_id="sql-tab"): # SQL Statistics Section gr.Markdown("### 📊 SQL Generation Statistics") with gr.Row(): with gr.Column(elem_classes="stats-card"): total_sql_queries = gr.Markdown("**Total SQL Queries:** Loading...") with gr.Column(elem_classes="stats-card"): sql_success_rate = gr.Markdown("**Success Rate:** Loading...") with gr.Column(elem_classes="stats-card"): failed_sql_queries = gr.Markdown("**Failed Queries:** Loading...") # SQL Charts Section with gr.Row(): with gr.Column(elem_classes="plot-container"): sql_state_chart = gr.Plot(label="SQL Query Success Distribution") with gr.Column(elem_classes="plot-container"): top_questions_chart = gr.Plot(label="Most Generated Queries") # Recent SQL Queries Section gr.Markdown("### 📝 Recent SQL Generations") with gr.Column(elem_classes="plot-container"): recent_sql_table = gr.DataFrame( label="Latest SQL Query Generations", interactive=False, elem_classes="dataframe" ) # Error Analysis Section gr.Markdown("### ⚠️ Failed Query Analysis") with gr.Column(elem_classes="plot-container"): sql_errors_table = gr.DataFrame( label="Recent Failed SQL Queries", interactive=False, elem_classes="dataframe" ) refresh_btn = gr.Button("🔄 Refresh Dashboard", variant="primary") # Update event handlers demo.load(update_sql_statistics, outputs=[total_sql_queries, sql_success_rate, failed_sql_queries, sql_state_chart, top_questions_chart]) demo.load(get_recent_sql_queries, outputs=[recent_sql_table]) demo.load(get_sql_error_analysis, outputs=[sql_errors_table]) refresh_btn.click(update_sql_statistics, outputs=[total_sql_queries, sql_success_rate, failed_sql_queries, sql_state_chart, top_questions_chart]) refresh_btn.click(get_recent_sql_queries, outputs=[recent_sql_table]) refresh_btn.click(get_sql_error_analysis, outputs=[sql_errors_table]) days_slider.change(update_timeline, inputs=[days_slider], outputs=[timeline_plot]) # Auto-refresh components # # Event handlers demo.load(update_statistics, outputs=[total_chats, unique_sessions, function_chart]) demo.load(lambda: update_timeline(7), outputs=[timeline_plot]) demo.load(get_faqs, outputs=[faq_table]) demo.load(get_recent_interactions, outputs=[recent_table]) refresh_btn.click(update_statistics, outputs=[total_chats, unique_sessions, function_chart]) refresh_btn.click(lambda: update_timeline(7), outputs=[timeline_plot]) refresh_btn.click(get_faqs, outputs=[faq_table]) refresh_btn.click(get_recent_interactions, outputs=[recent_table]) if __name__ == "__main__": demo.launch()