Spaces:

TAL-Chabot
/

TALAnalyticsDashboard

Sleeping

File size: 17,553 Bytes

import gradio as gr
import pandas as pd
import asyncio
from datetime import datetime, timedelta, timezone
import plotly.express as px
import plotly.graph_objects as go
from CosmosDBHandlers.cosmosChatHistoryHandler import ChatMemoryHandlerForAnalytics

class ChatAnalyticsDashboard:
    def __init__(self):
        self.handler = ChatMemoryHandlerForAnalytics()
        
    async def get_chat_statistics(self):
        """Get basic chat statistics - Fixed version"""
        try:
            # Get total chats - this works
            total_query = "SELECT VALUE COUNT(1) FROM c"
            total_chats = list(self.handler.chat_container.query_items(
                query=total_query,
                enable_cross_partition_query=True
            ))[0]
            
            # Get unique sessions - fetch all and count in Python
            session_query = "SELECT c.sessionId FROM c"
            session_results = list(self.handler.chat_container.query_items(
                query=session_query,
                enable_cross_partition_query=True
            ))
            unique_sessions = len(set(item['sessionId'] for item in session_results))
            
            # Get function usage - fetch all and group in Python
            function_query = "SELECT c.functionUsed FROM c"
            function_results = list(self.handler.chat_container.query_items(
                query=function_query,
                enable_cross_partition_query=True
            ))
            
            # Count function usage in Python
            from collections import Counter
            function_counts = Counter(item['functionUsed'] for item in function_results)
            function_usage = [
                {'functionUsed': func, 'count': count} 
                for func, count in function_counts.items()
            ]
            
            return {
                'total_chats': total_chats,
                'unique_sessions': unique_sessions,
                'function_usage': function_usage
            }
        except Exception as e:
            print(f"Error getting statistics: {e}")
            return {'total_chats': 0, 'unique_sessions': 0, 'function_usage': []}
    
    async def get_recent_chats(self, limit=10):
        """Get recent chat interactions"""
        try:
            query = f"""
            SELECT TOP {limit} c.sessionId, c.question, c.functionUsed, c.answer, c.timestamp
            FROM c
            ORDER BY c.timestamp DESC
            """
            
            results = list(self.handler.chat_container.query_items(
                query=query,
                enable_cross_partition_query=True
            ))
            
            return results
        except Exception as e:
            print(f"Error getting recent chats: {e}")
            return []

    async def get_chat_timeline(self, days=7):
        """Enhanced timeline data with minute-level precision"""
        try:
            start_date = (datetime.now(timezone.utc) - timedelta(days=days)).isoformat()
            
            query = f"""
            SELECT c.timestamp, c.functionUsed 
            FROM c 
            WHERE c.timestamp >= '{start_date}'
            ORDER BY c.timestamp
            """
            
            results = list(self.handler.chat_container.query_items(
                query=query,
                enable_cross_partition_query=True
            ))
            
            # Process for timeline with minute precision
            timeline_data = []
            for item in results:
                date = datetime.fromisoformat(item['timestamp'].replace('Z', '+00:00'))
                timeline_data.append({
                    'date': date.strftime('%Y-%m-%d'),
                    'hour': date.hour,
                    'minute': date.minute,
                    'datetime': date,
                    'function': item['functionUsed']
                })
            
            return timeline_data
        except Exception as e:
            print(f"Error getting timeline: {e}")
            return []
        
# Initialize dashboard
dashboard = ChatAnalyticsDashboard()

def sync_wrapper(async_func):
    """Wrapper to run async functions in Gradio"""
    def wrapper(*args, **kwargs):
        try:
            loop = asyncio.get_running_loop()
        except RuntimeError:
            loop = asyncio.new_event_loop()
            asyncio.set_event_loop(loop)
        
        return loop.run_until_complete(async_func(*args, **kwargs))
    return wrapper

@sync_wrapper
async def update_sql_statistics():
    """Update SQL query statistics """
    stats = await dashboard.handler.get_sql_query_statistics()
    
    # Create success rate chart with correct state values
    if stats['total_queries'] > 0:
        state_data = pd.DataFrame([
            {'State': 'Success', 'Count': stats['success_count']},
            {'State': 'Error', 'Count': stats['error_count']},
            {'State': 'Null', 'Count': stats['null_count']}  # Changed from 'Failed'
        ])

        state_chart = px.pie(state_data, values='Count', names='State', 
                           title='SQL Query Success Rate',
                           color_discrete_map={'Success': '#10b981', 'Error': '#ef4444', 'Null': '#6b7280'})
    else:
        state_chart = px.pie(values=[1], names=['No Data'], title='SQL Query Success Rate')
    
    # Create top questions chart
    if stats['top_questions']:
        questions_df = pd.DataFrame(stats['top_questions'])
        questions_chart = px.bar(questions_df.head(5), x='count', y='question', 
                                orientation='h', title='Top 5 Most Generated Queries')
        questions_chart.update_layout(yaxis={'categoryorder': 'total ascending'})
    else:
        questions_chart = px.bar(x=[0], y=['No Data'], title='Top Generated Queries')
    
    return (
        f"**Total SQL Queries:** {stats['total_queries']}",
        f"**Success Rate:** {stats['success_rate']:.1f}%",
        f"**Error/Null Queries:** {stats['error_count'] + stats['null_count']}",  # Updated label
        state_chart,
        questions_chart
    )



@sync_wrapper
async def get_recent_sql_queries():
    """Get recent SQL query generations"""
    recent = await dashboard.handler.get_recent_sql_queries(limit=15)
    
    if recent:
        recent_data = []
        for query in recent:
            recent_data.append({
                'Original Question': query['originalQuestion'][:60] + '...' if len(query['originalQuestion']) > 60 else query['originalQuestion'],
                'Generated SQL': query['generatedSql'][:80] + '...' if len(query['generatedSql']) > 80 else query['generatedSql'],
                'State': query['state'],
                'Timestamp': datetime.fromisoformat(query['timestamp'].replace('Z', '+00:00')).strftime('%Y-%m-%d %H:%M')
            })
        
        return pd.DataFrame(recent_data)
    else:
        return pd.DataFrame({'Message': ['No recent SQL queries']})

@sync_wrapper
async def get_sql_error_analysis():
    """Get failed SQL query analysis"""
    errors = await dashboard.handler.get_sql_error_analysis()
    
    if errors:
        error_data = []
        for error in errors[:10]:  # Limit to 10 most recent errors
            error_data.append({
                'Original Question': error['originalQuestion'][:50] + '...' if len(error['originalQuestion']) > 50 else error['originalQuestion'],
                'Generated SQL': error['generatedSql'][:60] + '...' if len(error['generatedSql']) > 60 else error['generatedSql'],
                'State': error['state'],
                'Timestamp': datetime.fromisoformat(error['timestamp'].replace('Z', '+00:00')).strftime('%Y-%m-%d %H:%M')
            })
        
        return pd.DataFrame(error_data)
    else:
        return pd.DataFrame({'Message': ['No failed queries found']})

@sync_wrapper
async def update_statistics():
    """Update dashboard statistics"""
    stats = await dashboard.get_chat_statistics()
    
    # Create function usage chart
    if stats['function_usage']:
        func_df = pd.DataFrame(stats['function_usage'])
        func_chart = px.pie(func_df, values='count', names='functionUsed', 
                           title='Function Usage Distribution')
    else:
        func_chart = px.pie(values=[1], names=['No Data'], title='Function Usage Distribution')
    
    return (
        f"**Total Chats:** {stats['total_chats']}",
        f"**Unique Sessions:** {stats['unique_sessions']}",
        func_chart
    )


@sync_wrapper
async def update_timeline(days):
    """Enhanced timeline function with adaptive granularity"""
    timeline_data = await dashboard.get_chat_timeline(days)
    
    if not timeline_data:
        # Return empty chart if no data
        empty_fig = go.Figure()
        empty_fig.add_annotation(
            text="No data available for selected period",
            xref="paper", yref="paper",
            x=0.5, y=0.5, showarrow=False
        )
        empty_fig.update_layout(title="Chat Activity Timeline")
        return empty_fig
    
    df = pd.DataFrame(timeline_data)
    
    if days > 1:
        # Multi-day view: Group by date for daily line plot
        daily_counts = df.groupby('date').size().reset_index(name='count')
        daily_counts['date'] = pd.to_datetime(daily_counts['date'])
        
        timeline_chart = px.line(
            daily_counts, 
            x='date', 
            y='count',
            title=f'Daily Chat Activity - Last {days} Days',
            markers=True,
            line_shape='linear'
        )
        
        timeline_chart.update_layout(
            xaxis_title="Date",
            yaxis_title="Number of Chats",
            hovermode='x unified'
        )
        
    # In the single day section of update_timeline:
    else:
        # Single day view: Group by 15-minute intervals
        df['datetime'] = pd.to_datetime(df['date'] + ' ' + 
                                    df['hour'].astype(str) + ':' + 
                                    df['minute'].astype(str) + ':00')
        
        # Create 15-minute intervals
        df['interval'] = df['datetime'].dt.floor('15min')
        interval_counts = df.groupby('interval').size().reset_index(name='count')
        
        timeline_chart = px.line(
            interval_counts,
            x='interval',
            y='count',
            title=f'Chat Activity by 15-min Intervals - {interval_counts.iloc[0]["interval"].strftime("%Y-%m-%d")}',
            markers=True,
            line_shape='linear'
        )
        
        timeline_chart.update_layout(
            xaxis_title="Time",
            yaxis_title="Number of Chats",
            xaxis=dict(
                tickformat='%H:%M',
                dtick=900000  # 15-minute intervals
            ),
            hovermode='x unified'
        )

    return timeline_chart


@sync_wrapper
async def get_faqs():
    """Get semantic FAQs with duplicate removal"""
    # Request more items than needed to account for duplicates
    faqs = await dashboard.handler.get_semantic_faqs(limit=15)  # Request more than the 10 we want
    
    if faqs:
        # Extract representative questions
        questions = [faq['representative_question'] for faq in faqs]
        
        # Remove duplicates while preserving order
        unique_questions = list(dict.fromkeys(questions))
        
        # Rebuild FAQ data with unique questions only
        unique_faqs = []
        seen_questions = set()
        
        for faq in faqs:
            question = faq['representative_question']
            if question not in seen_questions:
                seen_questions.add(question)
                unique_faqs.append({
                    'Question': question[:100] + '...' if len(question) > 100 else question,
                    'Similar Questions Count': len(faq['similar_questions']),
                    'Total Occurrences': faq['total_occurrences']
                })
                
                # Stop once we have 10 unique items
                if len(unique_faqs) >= 10:
                    break
        
        return pd.DataFrame(unique_faqs)
    else:
        return pd.DataFrame({'Message': ['No FAQ data available']})

@sync_wrapper
async def get_recent_interactions():
    """Get recent chat interactions"""
    recent = await dashboard.get_recent_chats(limit=20)
    
    if recent:
        recent_data = []
        for chat in recent:
            recent_data.append({
                'Session ID': chat['sessionId'][:8] + '...',
                'Question': chat['question'][:50] + '...' if len(chat['question']) > 50 else chat['question'],
                'Function': chat['functionUsed'],
                'Timestamp': datetime.fromisoformat(chat['timestamp'].replace('Z', '+00:00')).strftime('%Y-%m-%d %H:%M')
            })
        
        return pd.DataFrame(recent_data)
    else:
        return pd.DataFrame({'Message': ['No recent interactions']})

theme = gr.themes.Citrus(
    secondary_hue="amber",
    font=[gr.themes.GoogleFont('Inter'), 'ui-sans-serif', 'system-ui', 'sans-serif'],
    font_mono=[gr.themes.GoogleFont('Roboto Mono'), 'ui-monospace', 'Consolas', 'monospace'],
)

with gr.Blocks(theme=theme, 
                title="TAL Chat Analytics Dashboard") as demo:

    gr.Markdown("# Chat Analytics Dashboard")
    gr.Markdown("### Real-time analytics for TAL Chatbot")
    
    with gr.Row():
        total_chats = gr.Markdown("**Total Chats:** Loading...")
        unique_sessions = gr.Markdown("**Unique Sessions:** Loading...")

    with gr.Tabs():
        with gr.TabItem("Function Usage Distribution"):
            function_chart = gr.Plot(label="Function Usage Distribution")

        with gr.TabItem("📈 Timeline Analysis"):
            days_slider = gr.Slider(minimum=1, maximum=30, value=7, step=1, 
                                  label="Days to analyze")
            with gr.Row():
                timeline_plot = gr.Plot(label="Daily Chat Activity")
        
        with gr.TabItem("❓ Frequently Asked Questions"):
            faq_table = gr.DataFrame(label="Semantic FAQs", interactive=False)
        
        with gr.TabItem("💬 Recent Interactions"):
            recent_table = gr.DataFrame(label="Recent Chat Interactions", interactive=False)

        with gr.TabItem("🔍 SQL Query Analytics", elem_id="sql-tab"):
            # SQL Statistics Section
            gr.Markdown("### 📊 SQL Generation Statistics")
            with gr.Row():
                with gr.Column(elem_classes="stats-card"):
                    total_sql_queries = gr.Markdown("**Total SQL Queries:** Loading...")
                with gr.Column(elem_classes="stats-card"):
                    sql_success_rate = gr.Markdown("**Success Rate:** Loading...")
                with gr.Column(elem_classes="stats-card"):
                    failed_sql_queries = gr.Markdown("**Failed Queries:** Loading...")
            
            # SQL Charts Section
            with gr.Row():
                with gr.Column(elem_classes="plot-container"):
                    sql_state_chart = gr.Plot(label="SQL Query Success Distribution")
                with gr.Column(elem_classes="plot-container"):
                    top_questions_chart = gr.Plot(label="Most Generated Queries")
            
            # Recent SQL Queries Section
            gr.Markdown("### 📝 Recent SQL Generations")
            with gr.Column(elem_classes="plot-container"):
                recent_sql_table = gr.DataFrame(
                    label="Latest SQL Query Generations", 
                    interactive=False,
                    elem_classes="dataframe"
                )
            
            # Error Analysis Section
            gr.Markdown("### ⚠️ Failed Query Analysis")
            with gr.Column(elem_classes="plot-container"):
                sql_errors_table = gr.DataFrame(
                    label="Recent Failed SQL Queries", 
                    interactive=False,
                    elem_classes="dataframe"
                )
    refresh_btn = gr.Button("🔄 Refresh Dashboard", variant="primary")
    
    
# Update event handlers
    demo.load(update_sql_statistics, outputs=[total_sql_queries, sql_success_rate, failed_sql_queries, sql_state_chart, top_questions_chart])
    demo.load(get_recent_sql_queries, outputs=[recent_sql_table])
    demo.load(get_sql_error_analysis, outputs=[sql_errors_table])

    refresh_btn.click(update_sql_statistics, outputs=[total_sql_queries, sql_success_rate, failed_sql_queries, sql_state_chart, top_questions_chart])
    refresh_btn.click(get_recent_sql_queries, outputs=[recent_sql_table])
    refresh_btn.click(get_sql_error_analysis, outputs=[sql_errors_table])

    days_slider.change(update_timeline, inputs=[days_slider], 
                        outputs=[timeline_plot])

    # Auto-refresh components
   
    # # Event handlers
    demo.load(update_statistics, outputs=[total_chats, unique_sessions, function_chart])
    demo.load(lambda: update_timeline(7), outputs=[timeline_plot])
    demo.load(get_faqs, outputs=[faq_table])
    demo.load(get_recent_interactions, outputs=[recent_table])
    
    refresh_btn.click(update_statistics, outputs=[total_chats, unique_sessions, function_chart])
    refresh_btn.click(lambda: update_timeline(7), outputs=[timeline_plot])
    refresh_btn.click(get_faqs, outputs=[faq_table])
    refresh_btn.click(get_recent_interactions, outputs=[recent_table])
    
    

if __name__ == "__main__":
    demo.launch()