Sathvika-Alla commited on
Commit
33fd539
Β·
verified Β·
1 Parent(s): 8e24e37

Upload folder using huggingface_hub

Browse files
.env ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ AZURE_OPENAI_KEY = 74fO9RE4s4f7HTSd9SM19Adw6rnECwUuBnfY593dPI7xSHa057RHJQQJ99BEACfhMk5XJ3w3AAAAACOGFVJQ
2
+
3
+ OPENAI_API_TYPE = azure
4
+
5
+ OPENAI_EMBEDDINGS_MODEL_NAME = text-embedding-ada-002
6
+ OPENAI_EMBEDDINGS_MODEL_DEPLOYMENT = text-embedding-ada-002
7
+ OPENAI_API_ENDPOINT = https://tal-chatbot-resource2.cognitiveservices.azure.com/
8
+
9
+
10
+ AZURE_COSMOS_DB_ENDPOINT = https://tal-chatbot.documents.azure.com:443/
11
+ AZURE_COSMOS_DB_KEY = 6XG3CwRPJeHWAufiMNbWNS2PhBfoSMtPEP5qNGPQJFulXqgJfR9K3xO1sgegOq9vkjwSgmIDqA7hACDbWIzPVA==
.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
CosmosDBHandlers/__pycache__/cosmosChatHistoryHandler.cpython-311.pyc ADDED
Binary file (14.9 kB). View file
 
CosmosDBHandlers/cosmosChatHistoryHandler.py ADDED
@@ -0,0 +1,296 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # cosmosConnector.py
3
+ from azure.cosmos import exceptions
4
+ from datetime import datetime, timedelta, timezone
5
+ import uuid
6
+ from langchain_openai import AzureOpenAIEmbeddings
7
+ import os
8
+ from azure.cosmos import CosmosClient, PartitionKey
9
+ from typing import List, Optional, Dict
10
+ import logging
11
+ import os
12
+ from dotenv import load_dotenv
13
+ load_dotenv()
14
+ # Initialize Cosmos DB containers
15
+
16
+ class ChatMemoryHandlerForAnalytics():
17
+ def __init__(self, logger: Optional[logging.Logger] = None):
18
+ self.cosmos_client = CosmosClient(
19
+ os.getenv("AZURE_COSMOS_DB_ENDPOINT"),
20
+ os.getenv("AZURE_COSMOS_DB_KEY")
21
+ )
22
+ self.logger = logger
23
+ self.indexing_policy = {
24
+ "indexingMode": "consistent",
25
+ "includedPaths": [{"path": "/*"}], # Indexes all properties, including nested
26
+ "excludedPaths": [
27
+ {
28
+ "path": '/"_etag"/?'
29
+ },
30
+ {
31
+ "path": "/embedding/*"
32
+ }
33
+ ],
34
+ }
35
+
36
+
37
+ self.vector_embedding_policy = {
38
+ "vectorEmbeddings": [
39
+ {
40
+ "path": "/embedding",
41
+ "dataType": "float32",
42
+ "distanceFunction": "cosine",
43
+ "dimensions": 1536,
44
+ }
45
+ ]
46
+ }
47
+
48
+ self.embedding_model = AzureOpenAIEmbeddings(
49
+ azure_endpoint=os.environ["OPENAI_API_ENDPOINT"],
50
+ azure_deployment=os.environ["OPENAI_EMBEDDINGS_MODEL_DEPLOYMENT"],
51
+ api_key=os.environ["AZURE_OPENAI_KEY"]
52
+ )
53
+
54
+ self.database = self.cosmos_client.create_database_if_not_exists("TAL_ChatData")
55
+
56
+ # Container for chat history
57
+ self.chat_container = self.database.create_container_if_not_exists(
58
+ id="ChatHistory",
59
+ partition_key=PartitionKey(path="/functionUsed"),
60
+ indexing_policy=self.indexing_policy,
61
+ vector_embedding_policy=self.vector_embedding_policy
62
+ )
63
+
64
+ # Container for SQL queries
65
+ self.sql_container = self.database.create_container_if_not_exists(
66
+ id="GeneratedQueries",
67
+ partition_key=PartitionKey(path="/state")
68
+ )
69
+
70
+ async def _generate_embedding(self, query: str) -> List[float]:
71
+ """Generate embedding for the given query using Azure OpenAI"""
72
+ try:
73
+ return self.embedding_model.embed_query(query)
74
+ except Exception as e:
75
+ self.logger.error(f"Embedding generation failed: {str(e)}")
76
+ raise
77
+
78
+ async def get_semantic_faqs(self, limit: int = 5, threshold: float = 0.1) -> List[Dict]:
79
+ """Retrieve FAQs using vector embeddings for semantic similarity"""
80
+ try:
81
+ query = """
82
+ SELECT c.question FROM c
83
+ """
84
+ raw_results = list(self.chat_container.query_items(
85
+ query=query,
86
+ enable_cross_partition_query=True,
87
+ max_item_count=-1
88
+ ))
89
+
90
+ # Group by question in Python
91
+ from collections import Counter
92
+ question_counts = Counter(item['question'] for item in raw_results)
93
+ top_questions = question_counts.most_common(limit)
94
+
95
+ # Generate embeddings for top questions
96
+ faq_embeddings = {}
97
+ for question_text, count in top_questions:
98
+ embedding = await self._generate_embedding(question_text)
99
+ faq_embeddings[question_text] = {
100
+ 'embedding': embedding,
101
+ 'count': count
102
+ }
103
+
104
+ # Cluster similar questions
105
+ clustered_faqs = []
106
+ processed = set()
107
+
108
+ for text, data in faq_embeddings.items():
109
+ if text in processed:
110
+ continue
111
+
112
+ query = """
113
+ SELECT TOP 50 c.question, VectorDistance(c.embedding, @embedding) as distance
114
+ FROM c
115
+ ORDER BY VectorDistance(c.embedding, @embedding)
116
+ """
117
+ parameters = [{"name": "@embedding", "value": data['embedding']}]
118
+
119
+ similar_results = list(self.chat_container.query_items(
120
+ query=query,
121
+ parameters=parameters,
122
+ enable_cross_partition_query=True
123
+ ))
124
+
125
+ similarity_threshold = threshold
126
+ filtered_results = []
127
+ for item in similar_results:
128
+ similarity = 1 - item['distance'] # Convert distance to similarity
129
+ if similarity <= similarity_threshold:
130
+ filtered_results.append(item['question'])
131
+
132
+ # Count occurrences of similar questions
133
+ similar_question_counts = Counter(filtered_results)
134
+ cluster_count = sum(similar_question_counts.values())
135
+
136
+ clustered_faqs.append({
137
+ "representative_question": text,
138
+ "similar_questions": list(similar_question_counts.keys()),
139
+ "total_occurrences": cluster_count,
140
+ "similarity_scores": {q: 1 - item['distance'] for item in similar_results for q in [item['question']] if 1 - item['distance'] >= similarity_threshold}
141
+ })
142
+
143
+ # Mark all similar questions as processed
144
+ processed.update(filtered_results)
145
+ clustered_faqs.append({
146
+ "representative_question": text,
147
+ "similar_questions": [text],
148
+ "total_occurrences": data['count'],
149
+ "similarity_scores": {text: 1.0}
150
+ })
151
+ processed.add(text)
152
+
153
+ return sorted(clustered_faqs[:limit], key=lambda x: x['total_occurrences'], reverse=True)
154
+
155
+ except exceptions.CosmosHttpResponseError as ex:
156
+ print(f"Cosmos DB error: {ex}")
157
+ self.logger.error(f"Semantic FAQ retrieval failed: {str(e)}")
158
+ return []
159
+ except Exception as e:
160
+ if self.logger:
161
+ self.logger.error(f"Semantic FAQ retrieval failed: {str(e)}")
162
+ return []
163
+
164
+ async def get_sql_query_statistics(self):
165
+ """Get comprehensive SQL query statistics - CORRECTED"""
166
+ try:
167
+ # Get total queries
168
+ total_query = "SELECT VALUE COUNT(1) FROM c"
169
+ total_queries = list(self.sql_container.query_items(
170
+ query=total_query,
171
+ enable_cross_partition_query=True
172
+ ))[0]
173
+
174
+ # Get queries by state
175
+ state_query = "SELECT c.state FROM c"
176
+ state_results = list(self.sql_container.query_items(
177
+ query=state_query,
178
+ enable_cross_partition_query=True
179
+ ))
180
+
181
+ from collections import Counter
182
+ state_counts = Counter(item['state'] for item in state_results)
183
+
184
+ # Get top original questions
185
+ question_query = "SELECT c.originalQuestion FROM c"
186
+ question_results = list(self.sql_container.query_items(
187
+ query=question_query,
188
+ enable_cross_partition_query=True
189
+ ))
190
+
191
+ question_counts = Counter(item['originalQuestion'] for item in question_results)
192
+ top_questions = [
193
+ {'question': q, 'count': c}
194
+ for q, c in question_counts.most_common(10)
195
+ ]
196
+
197
+ return {
198
+ 'total_queries': total_queries,
199
+ 'success_count': state_counts.get('success', 0),
200
+ 'error_count': state_counts.get('error', 0),
201
+ 'null_count': state_counts.get('null', 0), # Changed from 'failed_count'
202
+ 'top_questions': top_questions,
203
+ 'success_rate': (state_counts.get('success', 0) / total_queries * 100) if total_queries > 0 else 0
204
+ }
205
+ except Exception as e:
206
+ print(f"Error getting SQL statistics: {e}")
207
+ return {'total_queries': 0, 'success_count': 0, 'error_count': 0, 'null_count': 0, 'top_questions': [], 'success_rate': 0}
208
+
209
+
210
+ async def get_sql_query_timeline(self, days=7):
211
+ """Get SQL query generation timeline"""
212
+ try:
213
+ start_date = (datetime.now(timezone.utc) - timedelta(days=days)).isoformat()
214
+
215
+ query = f"""
216
+ SELECT c.timestamp, c.state, c.originalQuestion
217
+ FROM c
218
+ WHERE c.timestamp >= '{start_date}'
219
+ ORDER BY c.timestamp
220
+ """
221
+
222
+ results = list(self.sql_container.query_items(
223
+ query=query,
224
+ enable_cross_partition_query=True
225
+ ))
226
+
227
+ timeline_data = []
228
+ for item in results:
229
+ date = datetime.fromisoformat(item['timestamp'].replace('Z', '+00:00'))
230
+ timeline_data.append({
231
+ 'date': date.strftime('%Y-%m-%d'),
232
+ 'hour': date.hour,
233
+ 'minute': date.minute,
234
+ 'datetime': date,
235
+ 'state': item['state'],
236
+ 'question': item['originalQuestion']
237
+ })
238
+
239
+ return timeline_data
240
+ except Exception as e:
241
+ self.logger.error(f"Error getting SQL timeline: {e}")
242
+ return []
243
+
244
+ async def get_recent_sql_queries(self, limit=20):
245
+ """Get recent SQL query generations with details"""
246
+ try:
247
+ query = f"""
248
+ SELECT TOP {limit} c.originalQuestion, c.generatedSql, c.state, c.timestamp
249
+ FROM c
250
+ ORDER BY c.timestamp DESC
251
+ """
252
+
253
+ results = list(self.sql_container.query_items(
254
+ query=query,
255
+ enable_cross_partition_query=True
256
+ ))
257
+
258
+ return results
259
+ except Exception as e:
260
+ self.logger.error(f"Error getting recent SQL queries: {e}")
261
+ return []
262
+
263
+ async def get_sql_error_analysis(self):
264
+ """Analyze failed SQL query patterns - CORRECTED"""
265
+ try:
266
+ query = """
267
+ SELECT c.originalQuestion, c.generatedSql, c.state, c.timestamp
268
+ FROM c
269
+ WHERE c.state != 'success'
270
+ ORDER BY c.timestamp DESC
271
+ """
272
+
273
+ results = list(self.sql_container.query_items(
274
+ query=query,
275
+ enable_cross_partition_query=True
276
+ ))
277
+
278
+ return results
279
+ except Exception as e:
280
+ print(f"Error getting SQL error analysis: {e}")
281
+ return []
282
+
283
+ import asyncio
284
+
285
+
286
+
287
+ handler = ChatMemoryHandlerForAnalytics()
288
+
289
+ async def main():
290
+ faqs = await handler.get_semantic_faqs()
291
+ for faq in faqs:
292
+
293
+ print("\n",faq["representative_question"],faq["similar_questions"],"\n")
294
+
295
+ if __name__ == "__main__":
296
+ asyncio.run(main())
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
  title: TALAnalyticsDashboard
3
- emoji: 🐠
4
- colorFrom: pink
5
- colorTo: green
6
  sdk: gradio
7
- sdk_version: 5.34.0
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: TALAnalyticsDashboard
3
+ app_file: analytics-dashboard.py
 
 
4
  sdk: gradio
5
+ sdk_version: 5.31.0
 
 
6
  ---
 
 
analytics-dashboard.py ADDED
@@ -0,0 +1,425 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import asyncio
4
+ from datetime import datetime, timedelta, timezone
5
+ import plotly.express as px
6
+ import plotly.graph_objects as go
7
+ from CosmosDBHandlers.cosmosChatHistoryHandler import ChatMemoryHandlerForAnalytics
8
+
9
+ class ChatAnalyticsDashboard:
10
+ def __init__(self):
11
+ self.handler = ChatMemoryHandlerForAnalytics()
12
+
13
+ async def get_chat_statistics(self):
14
+ """Get basic chat statistics - Fixed version"""
15
+ try:
16
+ # Get total chats - this works
17
+ total_query = "SELECT VALUE COUNT(1) FROM c"
18
+ total_chats = list(self.handler.chat_container.query_items(
19
+ query=total_query,
20
+ enable_cross_partition_query=True
21
+ ))[0]
22
+
23
+ # Get unique sessions - fetch all and count in Python
24
+ session_query = "SELECT c.sessionId FROM c"
25
+ session_results = list(self.handler.chat_container.query_items(
26
+ query=session_query,
27
+ enable_cross_partition_query=True
28
+ ))
29
+ unique_sessions = len(set(item['sessionId'] for item in session_results))
30
+
31
+ # Get function usage - fetch all and group in Python
32
+ function_query = "SELECT c.functionUsed FROM c"
33
+ function_results = list(self.handler.chat_container.query_items(
34
+ query=function_query,
35
+ enable_cross_partition_query=True
36
+ ))
37
+
38
+ # Count function usage in Python
39
+ from collections import Counter
40
+ function_counts = Counter(item['functionUsed'] for item in function_results)
41
+ function_usage = [
42
+ {'functionUsed': func, 'count': count}
43
+ for func, count in function_counts.items()
44
+ ]
45
+
46
+ return {
47
+ 'total_chats': total_chats,
48
+ 'unique_sessions': unique_sessions,
49
+ 'function_usage': function_usage
50
+ }
51
+ except Exception as e:
52
+ print(f"Error getting statistics: {e}")
53
+ return {'total_chats': 0, 'unique_sessions': 0, 'function_usage': []}
54
+
55
+ async def get_recent_chats(self, limit=10):
56
+ """Get recent chat interactions"""
57
+ try:
58
+ query = f"""
59
+ SELECT TOP {limit} c.sessionId, c.question, c.functionUsed, c.answer, c.timestamp
60
+ FROM c
61
+ ORDER BY c.timestamp DESC
62
+ """
63
+
64
+ results = list(self.handler.chat_container.query_items(
65
+ query=query,
66
+ enable_cross_partition_query=True
67
+ ))
68
+
69
+ return results
70
+ except Exception as e:
71
+ print(f"Error getting recent chats: {e}")
72
+ return []
73
+
74
+ async def get_chat_timeline(self, days=7):
75
+ """Enhanced timeline data with minute-level precision"""
76
+ try:
77
+ start_date = (datetime.now(timezone.utc) - timedelta(days=days)).isoformat()
78
+
79
+ query = f"""
80
+ SELECT c.timestamp, c.functionUsed
81
+ FROM c
82
+ WHERE c.timestamp >= '{start_date}'
83
+ ORDER BY c.timestamp
84
+ """
85
+
86
+ results = list(self.handler.chat_container.query_items(
87
+ query=query,
88
+ enable_cross_partition_query=True
89
+ ))
90
+
91
+ # Process for timeline with minute precision
92
+ timeline_data = []
93
+ for item in results:
94
+ date = datetime.fromisoformat(item['timestamp'].replace('Z', '+00:00'))
95
+ timeline_data.append({
96
+ 'date': date.strftime('%Y-%m-%d'),
97
+ 'hour': date.hour,
98
+ 'minute': date.minute,
99
+ 'datetime': date,
100
+ 'function': item['functionUsed']
101
+ })
102
+
103
+ return timeline_data
104
+ except Exception as e:
105
+ print(f"Error getting timeline: {e}")
106
+ return []
107
+
108
+ # Initialize dashboard
109
+ dashboard = ChatAnalyticsDashboard()
110
+
111
+ def sync_wrapper(async_func):
112
+ """Wrapper to run async functions in Gradio"""
113
+ def wrapper(*args, **kwargs):
114
+ try:
115
+ loop = asyncio.get_running_loop()
116
+ except RuntimeError:
117
+ loop = asyncio.new_event_loop()
118
+ asyncio.set_event_loop(loop)
119
+
120
+ return loop.run_until_complete(async_func(*args, **kwargs))
121
+ return wrapper
122
+
123
+ @sync_wrapper
124
+ async def update_sql_statistics():
125
+ """Update SQL query statistics """
126
+ stats = await dashboard.handler.get_sql_query_statistics()
127
+
128
+ # Create success rate chart with correct state values
129
+ if stats['total_queries'] > 0:
130
+ state_data = pd.DataFrame([
131
+ {'State': 'Success', 'Count': stats['success_count']},
132
+ {'State': 'Error', 'Count': stats['error_count']},
133
+ {'State': 'Null', 'Count': stats['null_count']} # Changed from 'Failed'
134
+ ])
135
+
136
+ state_chart = px.pie(state_data, values='Count', names='State',
137
+ title='SQL Query Success Rate',
138
+ color_discrete_map={'Success': '#10b981', 'Error': '#ef4444', 'Null': '#6b7280'})
139
+ else:
140
+ state_chart = px.pie(values=[1], names=['No Data'], title='SQL Query Success Rate')
141
+
142
+ # Create top questions chart
143
+ if stats['top_questions']:
144
+ questions_df = pd.DataFrame(stats['top_questions'])
145
+ questions_chart = px.bar(questions_df.head(5), x='count', y='question',
146
+ orientation='h', title='Top 5 Most Generated Queries')
147
+ questions_chart.update_layout(yaxis={'categoryorder': 'total ascending'})
148
+ else:
149
+ questions_chart = px.bar(x=[0], y=['No Data'], title='Top Generated Queries')
150
+
151
+ return (
152
+ f"**Total SQL Queries:** {stats['total_queries']}",
153
+ f"**Success Rate:** {stats['success_rate']:.1f}%",
154
+ f"**Error/Null Queries:** {stats['error_count'] + stats['null_count']}", # Updated label
155
+ state_chart,
156
+ questions_chart
157
+ )
158
+
159
+
160
+
161
+ @sync_wrapper
162
+ async def get_recent_sql_queries():
163
+ """Get recent SQL query generations"""
164
+ recent = await dashboard.handler.get_recent_sql_queries(limit=15)
165
+
166
+ if recent:
167
+ recent_data = []
168
+ for query in recent:
169
+ recent_data.append({
170
+ 'Original Question': query['originalQuestion'][:60] + '...' if len(query['originalQuestion']) > 60 else query['originalQuestion'],
171
+ 'Generated SQL': query['generatedSql'][:80] + '...' if len(query['generatedSql']) > 80 else query['generatedSql'],
172
+ 'State': query['state'],
173
+ 'Timestamp': datetime.fromisoformat(query['timestamp'].replace('Z', '+00:00')).strftime('%Y-%m-%d %H:%M')
174
+ })
175
+
176
+ return pd.DataFrame(recent_data)
177
+ else:
178
+ return pd.DataFrame({'Message': ['No recent SQL queries']})
179
+
180
+ @sync_wrapper
181
+ async def get_sql_error_analysis():
182
+ """Get failed SQL query analysis"""
183
+ errors = await dashboard.handler.get_sql_error_analysis()
184
+
185
+ if errors:
186
+ error_data = []
187
+ for error in errors[:10]: # Limit to 10 most recent errors
188
+ error_data.append({
189
+ 'Original Question': error['originalQuestion'][:50] + '...' if len(error['originalQuestion']) > 50 else error['originalQuestion'],
190
+ 'Generated SQL': error['generatedSql'][:60] + '...' if len(error['generatedSql']) > 60 else error['generatedSql'],
191
+ 'State': error['state'],
192
+ 'Timestamp': datetime.fromisoformat(error['timestamp'].replace('Z', '+00:00')).strftime('%Y-%m-%d %H:%M')
193
+ })
194
+
195
+ return pd.DataFrame(error_data)
196
+ else:
197
+ return pd.DataFrame({'Message': ['No failed queries found']})
198
+
199
+ @sync_wrapper
200
+ async def update_statistics():
201
+ """Update dashboard statistics"""
202
+ stats = await dashboard.get_chat_statistics()
203
+
204
+ # Create function usage chart
205
+ if stats['function_usage']:
206
+ func_df = pd.DataFrame(stats['function_usage'])
207
+ func_chart = px.pie(func_df, values='count', names='functionUsed',
208
+ title='Function Usage Distribution')
209
+ else:
210
+ func_chart = px.pie(values=[1], names=['No Data'], title='Function Usage Distribution')
211
+
212
+ return (
213
+ f"**Total Chats:** {stats['total_chats']}",
214
+ f"**Unique Sessions:** {stats['unique_sessions']}",
215
+ func_chart
216
+ )
217
+
218
+
219
+ @sync_wrapper
220
+ async def update_timeline(days):
221
+ """Enhanced timeline function with adaptive granularity"""
222
+ timeline_data = await dashboard.get_chat_timeline(days)
223
+
224
+ if not timeline_data:
225
+ # Return empty chart if no data
226
+ empty_fig = go.Figure()
227
+ empty_fig.add_annotation(
228
+ text="No data available for selected period",
229
+ xref="paper", yref="paper",
230
+ x=0.5, y=0.5, showarrow=False
231
+ )
232
+ empty_fig.update_layout(title="Chat Activity Timeline")
233
+ return empty_fig
234
+
235
+ df = pd.DataFrame(timeline_data)
236
+
237
+ if days > 1:
238
+ # Multi-day view: Group by date for daily line plot
239
+ daily_counts = df.groupby('date').size().reset_index(name='count')
240
+ daily_counts['date'] = pd.to_datetime(daily_counts['date'])
241
+
242
+ timeline_chart = px.line(
243
+ daily_counts,
244
+ x='date',
245
+ y='count',
246
+ title=f'Daily Chat Activity - Last {days} Days',
247
+ markers=True,
248
+ line_shape='linear'
249
+ )
250
+
251
+ timeline_chart.update_layout(
252
+ xaxis_title="Date",
253
+ yaxis_title="Number of Chats",
254
+ hovermode='x unified'
255
+ )
256
+
257
+ # In the single day section of update_timeline:
258
+ else:
259
+ # Single day view: Group by 15-minute intervals
260
+ df['datetime'] = pd.to_datetime(df['date'] + ' ' +
261
+ df['hour'].astype(str) + ':' +
262
+ df['minute'].astype(str) + ':00')
263
+
264
+ # Create 15-minute intervals
265
+ df['interval'] = df['datetime'].dt.floor('15min')
266
+ interval_counts = df.groupby('interval').size().reset_index(name='count')
267
+
268
+ timeline_chart = px.line(
269
+ interval_counts,
270
+ x='interval',
271
+ y='count',
272
+ title=f'Chat Activity by 15-min Intervals - {interval_counts.iloc[0]["interval"].strftime("%Y-%m-%d")}',
273
+ markers=True,
274
+ line_shape='linear'
275
+ )
276
+
277
+ timeline_chart.update_layout(
278
+ xaxis_title="Time",
279
+ yaxis_title="Number of Chats",
280
+ xaxis=dict(
281
+ tickformat='%H:%M',
282
+ dtick=900000 # 15-minute intervals
283
+ ),
284
+ hovermode='x unified'
285
+ )
286
+
287
+ return timeline_chart
288
+
289
+
290
+ @sync_wrapper
291
+ async def get_faqs():
292
+ """Get semantic FAQs"""
293
+ faqs = await dashboard.handler.get_semantic_faqs(limit=10)
294
+
295
+ if faqs:
296
+ faq_data = []
297
+ for faq in faqs:
298
+ faq_data.append({
299
+ 'Question': faq['representative_question'][:100] + '...' if len(faq['representative_question']) > 100 else faq['representative_question'],
300
+ 'Similar Questions Count': len(faq['similar_questions']),
301
+ 'Total Occurrences': faq['total_occurrences']
302
+ })
303
+
304
+ return pd.DataFrame(faq_data)
305
+ else:
306
+ return pd.DataFrame({'Message': ['No FAQ data available']})
307
+
308
+ @sync_wrapper
309
+ async def get_recent_interactions():
310
+ """Get recent chat interactions"""
311
+ recent = await dashboard.get_recent_chats(limit=20)
312
+
313
+ if recent:
314
+ recent_data = []
315
+ for chat in recent:
316
+ recent_data.append({
317
+ 'Session ID': chat['sessionId'][:8] + '...',
318
+ 'Question': chat['question'][:50] + '...' if len(chat['question']) > 50 else chat['question'],
319
+ 'Function': chat['functionUsed'],
320
+ 'Timestamp': datetime.fromisoformat(chat['timestamp'].replace('Z', '+00:00')).strftime('%Y-%m-%d %H:%M')
321
+ })
322
+
323
+ return pd.DataFrame(recent_data)
324
+ else:
325
+ return pd.DataFrame({'Message': ['No recent interactions']})
326
+
327
+ theme = gr.themes.Citrus(
328
+ secondary_hue="amber",
329
+ font=[gr.themes.GoogleFont('Inter'), 'ui-sans-serif', 'system-ui', 'sans-serif'],
330
+ font_mono=[gr.themes.GoogleFont('Roboto Mono'), 'ui-monospace', 'Consolas', 'monospace'],
331
+ )
332
+
333
+ with gr.Blocks(theme=theme,
334
+ title="TAL Chat Analytics Dashboard") as demo:
335
+
336
+ gr.Markdown("# Chat Analytics Dashboard")
337
+ gr.Markdown("### Real-time analytics for TAL Chatbot")
338
+
339
+ with gr.Row():
340
+ total_chats = gr.Markdown("**Total Chats:** Loading...")
341
+ unique_sessions = gr.Markdown("**Unique Sessions:** Loading...")
342
+
343
+ with gr.Tabs():
344
+ with gr.TabItem("Function Usage Distribution"):
345
+ function_chart = gr.Plot(label="Function Usage Distribution")
346
+
347
+ with gr.TabItem("πŸ“ˆ Timeline Analysis"):
348
+ days_slider = gr.Slider(minimum=1, maximum=30, value=7, step=1,
349
+ label="Days to analyze")
350
+ with gr.Row():
351
+ timeline_plot = gr.Plot(label="Daily Chat Activity")
352
+
353
+ with gr.TabItem("❓ Frequently Asked Questions"):
354
+ faq_table = gr.DataFrame(label="Semantic FAQs", interactive=False)
355
+
356
+ with gr.TabItem("πŸ’¬ Recent Interactions"):
357
+ recent_table = gr.DataFrame(label="Recent Chat Interactions", interactive=False)
358
+
359
+ with gr.TabItem("πŸ” SQL Query Analytics", elem_id="sql-tab"):
360
+ # SQL Statistics Section
361
+ gr.Markdown("### πŸ“Š SQL Generation Statistics")
362
+ with gr.Row():
363
+ with gr.Column(elem_classes="stats-card"):
364
+ total_sql_queries = gr.Markdown("**Total SQL Queries:** Loading...")
365
+ with gr.Column(elem_classes="stats-card"):
366
+ sql_success_rate = gr.Markdown("**Success Rate:** Loading...")
367
+ with gr.Column(elem_classes="stats-card"):
368
+ failed_sql_queries = gr.Markdown("**Failed Queries:** Loading...")
369
+
370
+ # SQL Charts Section
371
+ with gr.Row():
372
+ with gr.Column(elem_classes="plot-container"):
373
+ sql_state_chart = gr.Plot(label="SQL Query Success Distribution")
374
+ with gr.Column(elem_classes="plot-container"):
375
+ top_questions_chart = gr.Plot(label="Most Generated Queries")
376
+
377
+ # Recent SQL Queries Section
378
+ gr.Markdown("### πŸ“ Recent SQL Generations")
379
+ with gr.Column(elem_classes="plot-container"):
380
+ recent_sql_table = gr.DataFrame(
381
+ label="Latest SQL Query Generations",
382
+ interactive=False,
383
+ elem_classes="dataframe"
384
+ )
385
+
386
+ # Error Analysis Section
387
+ gr.Markdown("### ⚠️ Failed Query Analysis")
388
+ with gr.Column(elem_classes="plot-container"):
389
+ sql_errors_table = gr.DataFrame(
390
+ label="Recent Failed SQL Queries",
391
+ interactive=False,
392
+ elem_classes="dataframe"
393
+ )
394
+ refresh_btn = gr.Button("πŸ”„ Refresh Dashboard", variant="primary")
395
+
396
+
397
+ # Update event handlers
398
+ demo.load(update_sql_statistics, outputs=[total_sql_queries, sql_success_rate, failed_sql_queries, sql_state_chart, top_questions_chart])
399
+ demo.load(get_recent_sql_queries, outputs=[recent_sql_table])
400
+ demo.load(get_sql_error_analysis, outputs=[sql_errors_table])
401
+
402
+ refresh_btn.click(update_sql_statistics, outputs=[total_sql_queries, sql_success_rate, failed_sql_queries, sql_state_chart, top_questions_chart])
403
+ refresh_btn.click(get_recent_sql_queries, outputs=[recent_sql_table])
404
+ refresh_btn.click(get_sql_error_analysis, outputs=[sql_errors_table])
405
+
406
+ days_slider.change(update_timeline, inputs=[days_slider],
407
+ outputs=[timeline_plot])
408
+
409
+ # Auto-refresh components
410
+
411
+ # # Event handlers
412
+ demo.load(update_statistics, outputs=[total_chats, unique_sessions, function_chart])
413
+ demo.load(lambda: update_timeline(7), outputs=[timeline_plot])
414
+ demo.load(get_faqs, outputs=[faq_table])
415
+ demo.load(get_recent_interactions, outputs=[recent_table])
416
+
417
+ refresh_btn.click(update_statistics, outputs=[total_chats, unique_sessions, function_chart])
418
+ refresh_btn.click(lambda: update_timeline(7), outputs=[timeline_plot])
419
+ refresh_btn.click(get_faqs, outputs=[faq_table])
420
+ refresh_btn.click(get_recent_interactions, outputs=[recent_table])
421
+
422
+
423
+
424
+ if __name__ == "__main__":
425
+ demo.launch(server_name="0.0.0.0", server_port=7860, share=True)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ semantic-kernel
2
+ azure-cosmos
3
+ plotly