Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import asyncio | |
| from datetime import datetime, timedelta, timezone | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| from CosmosDBHandlers.cosmosChatHistoryHandler import ChatMemoryHandlerForAnalytics | |
| class ChatAnalyticsDashboard: | |
| def __init__(self): | |
| self.handler = ChatMemoryHandlerForAnalytics() | |
| async def get_chat_statistics(self): | |
| """Get basic chat statistics - Fixed version""" | |
| try: | |
| # Get total chats - this works | |
| total_query = "SELECT VALUE COUNT(1) FROM c" | |
| total_chats = list(self.handler.chat_container.query_items( | |
| query=total_query, | |
| enable_cross_partition_query=True | |
| ))[0] | |
| # Get unique sessions - fetch all and count in Python | |
| session_query = "SELECT c.sessionId FROM c" | |
| session_results = list(self.handler.chat_container.query_items( | |
| query=session_query, | |
| enable_cross_partition_query=True | |
| )) | |
| unique_sessions = len(set(item['sessionId'] for item in session_results)) | |
| # Get function usage - fetch all and group in Python | |
| function_query = "SELECT c.functionUsed FROM c" | |
| function_results = list(self.handler.chat_container.query_items( | |
| query=function_query, | |
| enable_cross_partition_query=True | |
| )) | |
| # Count function usage in Python | |
| from collections import Counter | |
| function_counts = Counter(item['functionUsed'] for item in function_results) | |
| function_usage = [ | |
| {'functionUsed': func, 'count': count} | |
| for func, count in function_counts.items() | |
| ] | |
| return { | |
| 'total_chats': total_chats, | |
| 'unique_sessions': unique_sessions, | |
| 'function_usage': function_usage | |
| } | |
| except Exception as e: | |
| print(f"Error getting statistics: {e}") | |
| return {'total_chats': 0, 'unique_sessions': 0, 'function_usage': []} | |
| async def get_recent_chats(self, limit=10): | |
| """Get recent chat interactions""" | |
| try: | |
| query = f""" | |
| SELECT TOP {limit} c.sessionId, c.question, c.functionUsed, c.answer, c.timestamp | |
| FROM c | |
| ORDER BY c.timestamp DESC | |
| """ | |
| results = list(self.handler.chat_container.query_items( | |
| query=query, | |
| enable_cross_partition_query=True | |
| )) | |
| return results | |
| except Exception as e: | |
| print(f"Error getting recent chats: {e}") | |
| return [] | |
| async def get_chat_timeline(self, days=7): | |
| """Enhanced timeline data with minute-level precision""" | |
| try: | |
| start_date = (datetime.now(timezone.utc) - timedelta(days=days)).isoformat() | |
| query = f""" | |
| SELECT c.timestamp, c.functionUsed | |
| FROM c | |
| WHERE c.timestamp >= '{start_date}' | |
| ORDER BY c.timestamp | |
| """ | |
| results = list(self.handler.chat_container.query_items( | |
| query=query, | |
| enable_cross_partition_query=True | |
| )) | |
| # Process for timeline with minute precision | |
| timeline_data = [] | |
| for item in results: | |
| date = datetime.fromisoformat(item['timestamp'].replace('Z', '+00:00')) | |
| timeline_data.append({ | |
| 'date': date.strftime('%Y-%m-%d'), | |
| 'hour': date.hour, | |
| 'minute': date.minute, | |
| 'datetime': date, | |
| 'function': item['functionUsed'] | |
| }) | |
| return timeline_data | |
| except Exception as e: | |
| print(f"Error getting timeline: {e}") | |
| return [] | |
| # Initialize dashboard | |
| dashboard = ChatAnalyticsDashboard() | |
| def sync_wrapper(async_func): | |
| """Wrapper to run async functions in Gradio""" | |
| def wrapper(*args, **kwargs): | |
| try: | |
| loop = asyncio.get_running_loop() | |
| except RuntimeError: | |
| loop = asyncio.new_event_loop() | |
| asyncio.set_event_loop(loop) | |
| return loop.run_until_complete(async_func(*args, **kwargs)) | |
| return wrapper | |
| async def update_sql_statistics(): | |
| """Update SQL query statistics """ | |
| stats = await dashboard.handler.get_sql_query_statistics() | |
| # Create success rate chart with correct state values | |
| if stats['total_queries'] > 0: | |
| state_data = pd.DataFrame([ | |
| {'State': 'Success', 'Count': stats['success_count']}, | |
| {'State': 'Error', 'Count': stats['error_count']}, | |
| {'State': 'Null', 'Count': stats['null_count']} # Changed from 'Failed' | |
| ]) | |
| state_chart = px.pie(state_data, values='Count', names='State', | |
| title='SQL Query Success Rate', | |
| color_discrete_map={'Success': '#10b981', 'Error': '#ef4444', 'Null': '#6b7280'}) | |
| else: | |
| state_chart = px.pie(values=[1], names=['No Data'], title='SQL Query Success Rate') | |
| # Create top questions chart | |
| if stats['top_questions']: | |
| questions_df = pd.DataFrame(stats['top_questions']) | |
| questions_chart = px.bar(questions_df.head(5), x='count', y='question', | |
| orientation='h', title='Top 5 Most Generated Queries') | |
| questions_chart.update_layout(yaxis={'categoryorder': 'total ascending'}) | |
| else: | |
| questions_chart = px.bar(x=[0], y=['No Data'], title='Top Generated Queries') | |
| return ( | |
| f"**Total SQL Queries:** {stats['total_queries']}", | |
| f"**Success Rate:** {stats['success_rate']:.1f}%", | |
| f"**Error/Null Queries:** {stats['error_count'] + stats['null_count']}", # Updated label | |
| state_chart, | |
| questions_chart | |
| ) | |
| async def get_recent_sql_queries(): | |
| """Get recent SQL query generations""" | |
| recent = await dashboard.handler.get_recent_sql_queries(limit=15) | |
| if recent: | |
| recent_data = [] | |
| for query in recent: | |
| recent_data.append({ | |
| 'Original Question': query['originalQuestion'][:60] + '...' if len(query['originalQuestion']) > 60 else query['originalQuestion'], | |
| 'Generated SQL': query['generatedSql'][:80] + '...' if len(query['generatedSql']) > 80 else query['generatedSql'], | |
| 'State': query['state'], | |
| 'Timestamp': datetime.fromisoformat(query['timestamp'].replace('Z', '+00:00')).strftime('%Y-%m-%d %H:%M') | |
| }) | |
| return pd.DataFrame(recent_data) | |
| else: | |
| return pd.DataFrame({'Message': ['No recent SQL queries']}) | |
| async def get_sql_error_analysis(): | |
| """Get failed SQL query analysis""" | |
| errors = await dashboard.handler.get_sql_error_analysis() | |
| if errors: | |
| error_data = [] | |
| for error in errors[:10]: # Limit to 10 most recent errors | |
| error_data.append({ | |
| 'Original Question': error['originalQuestion'][:50] + '...' if len(error['originalQuestion']) > 50 else error['originalQuestion'], | |
| 'Generated SQL': error['generatedSql'][:60] + '...' if len(error['generatedSql']) > 60 else error['generatedSql'], | |
| 'State': error['state'], | |
| 'Timestamp': datetime.fromisoformat(error['timestamp'].replace('Z', '+00:00')).strftime('%Y-%m-%d %H:%M') | |
| }) | |
| return pd.DataFrame(error_data) | |
| else: | |
| return pd.DataFrame({'Message': ['No failed queries found']}) | |
| async def update_statistics(): | |
| """Update dashboard statistics""" | |
| stats = await dashboard.get_chat_statistics() | |
| # Create function usage chart | |
| if stats['function_usage']: | |
| func_df = pd.DataFrame(stats['function_usage']) | |
| func_chart = px.pie(func_df, values='count', names='functionUsed', | |
| title='Function Usage Distribution') | |
| else: | |
| func_chart = px.pie(values=[1], names=['No Data'], title='Function Usage Distribution') | |
| return ( | |
| f"**Total Chats:** {stats['total_chats']}", | |
| f"**Unique Sessions:** {stats['unique_sessions']}", | |
| func_chart | |
| ) | |
| async def update_timeline(days): | |
| """Enhanced timeline function with adaptive granularity""" | |
| timeline_data = await dashboard.get_chat_timeline(days) | |
| if not timeline_data: | |
| # Return empty chart if no data | |
| empty_fig = go.Figure() | |
| empty_fig.add_annotation( | |
| text="No data available for selected period", | |
| xref="paper", yref="paper", | |
| x=0.5, y=0.5, showarrow=False | |
| ) | |
| empty_fig.update_layout(title="Chat Activity Timeline") | |
| return empty_fig | |
| df = pd.DataFrame(timeline_data) | |
| if days > 1: | |
| # Multi-day view: Group by date for daily line plot | |
| daily_counts = df.groupby('date').size().reset_index(name='count') | |
| daily_counts['date'] = pd.to_datetime(daily_counts['date']) | |
| timeline_chart = px.line( | |
| daily_counts, | |
| x='date', | |
| y='count', | |
| title=f'Daily Chat Activity - Last {days} Days', | |
| markers=True, | |
| line_shape='linear' | |
| ) | |
| timeline_chart.update_layout( | |
| xaxis_title="Date", | |
| yaxis_title="Number of Chats", | |
| hovermode='x unified' | |
| ) | |
| # In the single day section of update_timeline: | |
| else: | |
| # Single day view: Group by 15-minute intervals | |
| df['datetime'] = pd.to_datetime(df['date'] + ' ' + | |
| df['hour'].astype(str) + ':' + | |
| df['minute'].astype(str) + ':00') | |
| # Create 15-minute intervals | |
| df['interval'] = df['datetime'].dt.floor('15min') | |
| interval_counts = df.groupby('interval').size().reset_index(name='count') | |
| timeline_chart = px.line( | |
| interval_counts, | |
| x='interval', | |
| y='count', | |
| title=f'Chat Activity by 15-min Intervals - {interval_counts.iloc[0]["interval"].strftime("%Y-%m-%d")}', | |
| markers=True, | |
| line_shape='linear' | |
| ) | |
| timeline_chart.update_layout( | |
| xaxis_title="Time", | |
| yaxis_title="Number of Chats", | |
| xaxis=dict( | |
| tickformat='%H:%M', | |
| dtick=900000 # 15-minute intervals | |
| ), | |
| hovermode='x unified' | |
| ) | |
| return timeline_chart | |
| async def get_faqs(): | |
| """Get semantic FAQs with duplicate removal""" | |
| # Request more items than needed to account for duplicates | |
| faqs = await dashboard.handler.get_semantic_faqs(limit=15) # Request more than the 10 we want | |
| if faqs: | |
| # Extract representative questions | |
| questions = [faq['representative_question'] for faq in faqs] | |
| # Remove duplicates while preserving order | |
| unique_questions = list(dict.fromkeys(questions)) | |
| # Rebuild FAQ data with unique questions only | |
| unique_faqs = [] | |
| seen_questions = set() | |
| for faq in faqs: | |
| question = faq['representative_question'] | |
| if question not in seen_questions: | |
| seen_questions.add(question) | |
| unique_faqs.append({ | |
| 'Question': question[:100] + '...' if len(question) > 100 else question, | |
| 'Similar Questions Count': len(faq['similar_questions']), | |
| 'Total Occurrences': faq['total_occurrences'] | |
| }) | |
| # Stop once we have 10 unique items | |
| if len(unique_faqs) >= 10: | |
| break | |
| return pd.DataFrame(unique_faqs) | |
| else: | |
| return pd.DataFrame({'Message': ['No FAQ data available']}) | |
| async def get_recent_interactions(): | |
| """Get recent chat interactions""" | |
| recent = await dashboard.get_recent_chats(limit=20) | |
| if recent: | |
| recent_data = [] | |
| for chat in recent: | |
| recent_data.append({ | |
| 'Session ID': chat['sessionId'][:8] + '...', | |
| 'Question': chat['question'][:50] + '...' if len(chat['question']) > 50 else chat['question'], | |
| 'Function': chat['functionUsed'], | |
| 'Timestamp': datetime.fromisoformat(chat['timestamp'].replace('Z', '+00:00')).strftime('%Y-%m-%d %H:%M') | |
| }) | |
| return pd.DataFrame(recent_data) | |
| else: | |
| return pd.DataFrame({'Message': ['No recent interactions']}) | |
| theme = gr.themes.Citrus( | |
| secondary_hue="amber", | |
| font=[gr.themes.GoogleFont('Inter'), 'ui-sans-serif', 'system-ui', 'sans-serif'], | |
| font_mono=[gr.themes.GoogleFont('Roboto Mono'), 'ui-monospace', 'Consolas', 'monospace'], | |
| ) | |
| with gr.Blocks(theme=theme, | |
| title="TAL Chat Analytics Dashboard") as demo: | |
| gr.Markdown("# Chat Analytics Dashboard") | |
| gr.Markdown("### Real-time analytics for TAL Chatbot") | |
| with gr.Row(): | |
| total_chats = gr.Markdown("**Total Chats:** Loading...") | |
| unique_sessions = gr.Markdown("**Unique Sessions:** Loading...") | |
| with gr.Tabs(): | |
| with gr.TabItem("Function Usage Distribution"): | |
| function_chart = gr.Plot(label="Function Usage Distribution") | |
| with gr.TabItem("π Timeline Analysis"): | |
| days_slider = gr.Slider(minimum=1, maximum=30, value=7, step=1, | |
| label="Days to analyze") | |
| with gr.Row(): | |
| timeline_plot = gr.Plot(label="Daily Chat Activity") | |
| with gr.TabItem("β Frequently Asked Questions"): | |
| faq_table = gr.DataFrame(label="Semantic FAQs", interactive=False) | |
| with gr.TabItem("π¬ Recent Interactions"): | |
| recent_table = gr.DataFrame(label="Recent Chat Interactions", interactive=False) | |
| with gr.TabItem("π SQL Query Analytics", elem_id="sql-tab"): | |
| # SQL Statistics Section | |
| gr.Markdown("### π SQL Generation Statistics") | |
| with gr.Row(): | |
| with gr.Column(elem_classes="stats-card"): | |
| total_sql_queries = gr.Markdown("**Total SQL Queries:** Loading...") | |
| with gr.Column(elem_classes="stats-card"): | |
| sql_success_rate = gr.Markdown("**Success Rate:** Loading...") | |
| with gr.Column(elem_classes="stats-card"): | |
| failed_sql_queries = gr.Markdown("**Failed Queries:** Loading...") | |
| # SQL Charts Section | |
| with gr.Row(): | |
| with gr.Column(elem_classes="plot-container"): | |
| sql_state_chart = gr.Plot(label="SQL Query Success Distribution") | |
| with gr.Column(elem_classes="plot-container"): | |
| top_questions_chart = gr.Plot(label="Most Generated Queries") | |
| # Recent SQL Queries Section | |
| gr.Markdown("### π Recent SQL Generations") | |
| with gr.Column(elem_classes="plot-container"): | |
| recent_sql_table = gr.DataFrame( | |
| label="Latest SQL Query Generations", | |
| interactive=False, | |
| elem_classes="dataframe" | |
| ) | |
| # Error Analysis Section | |
| gr.Markdown("### β οΈ Failed Query Analysis") | |
| with gr.Column(elem_classes="plot-container"): | |
| sql_errors_table = gr.DataFrame( | |
| label="Recent Failed SQL Queries", | |
| interactive=False, | |
| elem_classes="dataframe" | |
| ) | |
| refresh_btn = gr.Button("π Refresh Dashboard", variant="primary") | |
| # Update event handlers | |
| demo.load(update_sql_statistics, outputs=[total_sql_queries, sql_success_rate, failed_sql_queries, sql_state_chart, top_questions_chart]) | |
| demo.load(get_recent_sql_queries, outputs=[recent_sql_table]) | |
| demo.load(get_sql_error_analysis, outputs=[sql_errors_table]) | |
| refresh_btn.click(update_sql_statistics, outputs=[total_sql_queries, sql_success_rate, failed_sql_queries, sql_state_chart, top_questions_chart]) | |
| refresh_btn.click(get_recent_sql_queries, outputs=[recent_sql_table]) | |
| refresh_btn.click(get_sql_error_analysis, outputs=[sql_errors_table]) | |
| days_slider.change(update_timeline, inputs=[days_slider], | |
| outputs=[timeline_plot]) | |
| # Auto-refresh components | |
| # # Event handlers | |
| demo.load(update_statistics, outputs=[total_chats, unique_sessions, function_chart]) | |
| demo.load(lambda: update_timeline(7), outputs=[timeline_plot]) | |
| demo.load(get_faqs, outputs=[faq_table]) | |
| demo.load(get_recent_interactions, outputs=[recent_table]) | |
| refresh_btn.click(update_statistics, outputs=[total_chats, unique_sessions, function_chart]) | |
| refresh_btn.click(lambda: update_timeline(7), outputs=[timeline_plot]) | |
| refresh_btn.click(get_faqs, outputs=[faq_table]) | |
| refresh_btn.click(get_recent_interactions, outputs=[recent_table]) | |
| if __name__ == "__main__": | |
| demo.launch() | |