jdesiree commited on
Commit
43f316f
·
verified ·
1 Parent(s): 1e96b40

Metric Recording Implementation

Browse files
Files changed (1) hide show
  1. app.py +36 -2
app.py CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
2
  from langchain.prompts import ChatPromptTemplate
3
  from langchain.schema import HumanMessage, SystemMessage, AIMessage
4
  from huggingface_hub import InferenceClient
 
5
  import os
6
  import time
7
  import logging
@@ -21,6 +22,9 @@ client = InferenceClient(
21
  provider="together",
22
  api_key=hf_token,
23
  )
 
 
 
24
  math_template = ChatPromptTemplate.from_messages([
25
  ("system", """{system_message}
26
  You are an expert math tutor. For every math problem:
@@ -103,6 +107,14 @@ def smart_truncate(text, max_length=3000):
103
 
104
  def respond_with_enhanced_streaming(message, history):
105
  """Streams the bot's response, detecting the subject and handling errors."""
 
 
 
 
 
 
 
 
106
  try:
107
  template, mode = detect_subject(message)
108
 
@@ -139,6 +151,9 @@ def respond_with_enhanced_streaming(message, history):
139
  {"role": "system", "content": "You are EduBot, an expert AI learning assistant."},
140
  {"role": "user", "content": formatted_prompt}
141
  ]
 
 
 
142
 
143
  completion = client.chat.completions.create(
144
  model="Qwen/Qwen2.5-7B-Instruct",
@@ -147,9 +162,11 @@ def respond_with_enhanced_streaming(message, history):
147
  temperature=0.7,
148
  top_p=0.9,
149
  )
 
 
 
150
 
151
  response = completion.choices[0].message.content
152
-
153
  response = smart_truncate(response, max_length=3000)
154
 
155
  # Stream the response word by word
@@ -159,19 +176,36 @@ def respond_with_enhanced_streaming(message, history):
159
  for i, word in enumerate(words):
160
  partial_response += word + " "
161
 
162
- # Update the stream periodically
163
  if i % 4 == 0:
 
164
  yield partial_response
165
  time.sleep(0.03)
166
 
167
  final_response = f"*{mode}*\n\n{response}"
168
  logger.info(f"Response completed. Length: {len(response)} characters")
 
 
 
169
  yield final_response
170
 
171
  except Exception as e:
 
 
172
  logger.exception("Error in response generation")
173
  yield f"Sorry, I encountered an error: {str(e)}"
174
 
 
 
 
 
 
 
 
 
 
 
 
175
  # --- Fixed Gradio UI and CSS ---
176
  custom_css = """
177
  /* Main container styling */
 
2
  from langchain.prompts import ChatPromptTemplate
3
  from langchain.schema import HumanMessage, SystemMessage, AIMessage
4
  from huggingface_hub import InferenceClient
5
+ from metrics import EduBotMetrics
6
  import os
7
  import time
8
  import logging
 
22
  provider="together",
23
  api_key=hf_token,
24
  )
25
+
26
+ metrics_tracker = EduBotMetrics()
27
+
28
  math_template = ChatPromptTemplate.from_messages([
29
  ("system", """{system_message}
30
  You are an expert math tutor. For every math problem:
 
107
 
108
  def respond_with_enhanced_streaming(message, history):
109
  """Streams the bot's response, detecting the subject and handling errors."""
110
+
111
+ # Start metrics timing
112
+ timing_context = metrics_tracker.start_timing()
113
+ error_occurred = False
114
+ error_message = None
115
+ response = ""
116
+ mode = ""
117
+
118
  try:
119
  template, mode = detect_subject(message)
120
 
 
151
  {"role": "system", "content": "You are EduBot, an expert AI learning assistant."},
152
  {"role": "user", "content": formatted_prompt}
153
  ]
154
+
155
+ # Mark provider API start
156
+ metrics_tracker.mark_provider_start(timing_context)
157
 
158
  completion = client.chat.completions.create(
159
  model="Qwen/Qwen2.5-7B-Instruct",
 
162
  temperature=0.7,
163
  top_p=0.9,
164
  )
165
+
166
+ # Mark provider API end
167
+ metrics_tracker.mark_provider_end(timing_context)
168
 
169
  response = completion.choices[0].message.content
 
170
  response = smart_truncate(response, max_length=3000)
171
 
172
  # Stream the response word by word
 
176
  for i, word in enumerate(words):
177
  partial_response += word + " "
178
 
179
+ # Update the stream periodically and record chunks
180
  if i % 4 == 0:
181
+ metrics_tracker.record_chunk(timing_context)
182
  yield partial_response
183
  time.sleep(0.03)
184
 
185
  final_response = f"*{mode}*\n\n{response}"
186
  logger.info(f"Response completed. Length: {len(response)} characters")
187
+
188
+ # Record final chunk
189
+ metrics_tracker.record_chunk(timing_context)
190
  yield final_response
191
 
192
  except Exception as e:
193
+ error_occurred = True
194
+ error_message = str(e)
195
  logger.exception("Error in response generation")
196
  yield f"Sorry, I encountered an error: {str(e)}"
197
 
198
+ finally:
199
+ # Log the complete interaction with metrics
200
+ metrics_tracker.log_interaction(
201
+ mode=mode or "Unknown",
202
+ query=message,
203
+ response=response,
204
+ timing_context=timing_context,
205
+ error_occurred=error_occurred,
206
+ error_message=error_message
207
+ )
208
+
209
  # --- Fixed Gradio UI and CSS ---
210
  custom_css = """
211
  /* Main container styling */