Prathamesh1420 commited on
Commit
8a58473
·
verified ·
1 Parent(s): d145778

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +278 -243
app.py CHANGED
@@ -1,3 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import gradio as gr
3
  import requests
@@ -8,81 +116,80 @@ from langchain.prompts import PromptTemplate
8
  from langchain.chains.llm import LLMChain
9
  from langchain.llms.base import LLM
10
  from typing import Optional, List, Mapping, Any
11
- import time
12
  from langchain_community.embeddings import HuggingFaceEmbeddings
13
  from dotenv import load_dotenv
14
  from datetime import datetime
15
 
16
- # DeepEval imports
17
- try:
18
- from deepeval.test_case import LLMTestCase
19
- from deepeval.metrics import AnswerRelevancyMetric, HallucinationMetric
20
- from deepeval.metrics import BaseMetric
21
- from deepeval.models.base_model import DeepEvalBaseLLM
22
- except Exception:
23
- raise
24
-
25
- # Gemini imports (evaluation only)
26
- try:
27
- import google.generativeai as genai
28
- from langchain_google_genai import ChatGoogleGenerativeAI
29
- except Exception:
30
- ChatGoogleGenerativeAI = None
31
- genai = None
32
-
33
- # Load env vars
34
- load_dotenv()
35
- PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY", "")
36
- MLFLOW_TRACKING_URI = os.environ.get("MLFLOW_TRACKING_URI", "http://localhost:5000")
37
- GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "")
38
- LITSERVE_ENDPOINT = os.environ.get("LITSERVE_ENDPOINT", "")
39
-
40
- # DagsHub + MLflow setup
41
- try:
42
- dagshub.init(
43
- repo_owner='prathamesh.khade20',
44
- repo_name='Maintenance_AI_website',
45
- mlflow=True
46
- )
47
- except Exception:
48
- pass
49
 
50
- mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
51
- mlflow.set_experiment("Maintenance-RAG-Chatbot")
 
52
 
53
- # ----------- Prompt template -----------
54
- prompt_template = """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  You are a smart assistant. Based on the provided context, answer the question in 1–2 lines only.
56
  If the context has more details, summarize it concisely.
57
  Context:
58
  {context}
59
  Question: {question}
60
  Answer:
61
- """
62
 
63
- # ----------- 1. Custom LLM for LitServe (Lightning AI generator) -----------
64
  class LitServeLLM(LLM):
65
  endpoint_url: str
66
 
 
67
  def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
68
  payload = {"prompt": prompt}
69
- start_time = time.time()
70
- response = requests.post(self.endpoint_url, json=payload)
71
- latency = time.time() - start_time
72
- mlflow.log_metric("lit_serve_latency", latency)
73
- if response.status_code == 200:
74
- data = response.json()
75
- mlflow.log_metric("response_tokens", len(data.get("response", "").split()))
76
- return data.get("response", "").strip()
77
- else:
78
- mlflow.log_metric("request_errors", 1)
79
- error_info = {
80
- "status_code": response.status_code,
81
- "error": response.text,
82
- "timestamp": datetime.now().isoformat()
83
- }
84
- mlflow.log_dict(error_info, "artifacts/error_log.json")
85
- raise ValueError(f"Request failed: {response.status_code}")
 
 
 
 
86
 
87
  @property
88
  def _identifying_params(self) -> Mapping[str, Any]:
@@ -93,229 +200,157 @@ class LitServeLLM(LLM):
93
  return "litserve_llm"
94
 
95
  # ----------- 2. Pinecone Connection -----------
 
96
  def init_pinecone():
 
97
  pc = Pinecone(api_key=PINECONE_API_KEY)
98
  return pc.Index("rag-granite-index")
99
 
100
- try:
101
- index = init_pinecone()
102
- except Exception:
103
- index = None
104
 
105
  # ----------- 3. Embedding Model -----------
106
  embeddings_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
107
 
108
- # ----------- 4. Context Retrieval -----------
 
109
  def get_retrieved_context(query: str, top_k=3):
110
- start_time = time.time()
111
- query_embedding = embeddings_model.embed_query(query)
112
- mlflow.log_metric("embedding_latency", time.time() - start_time)
113
-
114
- if index is None:
115
- return ""
116
-
117
- start_time = time.time()
118
- results = index.query(
119
- namespace="rag-ns",
120
- vector=query_embedding,
121
- top_k=top_k,
122
- include_metadata=True
123
- )
124
- mlflow.log_metric("pinecone_latency", time.time() - start_time)
125
- mlflow.log_metric("retrieved_chunks", len(results['matches']))
126
-
127
  context_parts = [match['metadata']['text'] for match in results['matches']]
128
  return "\n".join(context_parts)
129
 
130
  # ----------- 5. LLM Chain Setup -----------
131
- model = LitServeLLM(endpoint_url=LITSERVE_ENDPOINT)
132
- prompt = PromptTemplate(input_variables=["context", "question"], template=prompt_template)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  llm_chain = LLMChain(llm=model, prompt=prompt)
134
 
135
- # ----------- 6. RAG Pipeline (Lightning AI) -----------
 
136
  def rag_pipeline(question):
 
137
  try:
 
138
  with mlflow.start_run(run_name=f"Query-{datetime.now().strftime('%H%M%S')}", nested=True):
139
  mlflow.log_param("user_question", question)
 
 
140
  retrieved_context = get_retrieved_context(question)
141
  mlflow.log_text(retrieved_context, "artifacts/retrieved_context.txt")
142
-
 
143
  start_time = time.time()
144
- response_obj = llm_chain.invoke({"context": retrieved_context, "question": question})
145
- response = response_obj.get("text") if isinstance(response_obj, dict) else getattr(response_obj, "text", str(response_obj))
146
- response = response.strip()
147
-
 
 
148
  if "Answer:" in response:
149
  response = response.split("Answer:", 1)[-1].strip()
150
-
 
151
  mlflow.log_metric("response_latency", time.time() - start_time)
152
  mlflow.log_metric("response_length", len(response))
153
  mlflow.log_text(response, "artifacts/response.txt")
 
154
  return response
 
155
  except Exception as e:
156
  mlflow.log_metric("pipeline_errors", 1)
157
- error_info = {"error": str(e), "question": question, "timestamp": datetime.now().isoformat()}
 
 
 
 
158
  mlflow.log_dict(error_info, "artifacts/pipeline_errors.json")
159
  return f"Error: {str(e)}"
160
 
161
- # ----------- 7. DeepEval Wrappers (Gemini evaluator only) -----------
162
- class GoogleVertexAI(DeepEvalBaseLLM):
163
- def __init__(self, model):
164
- self.model = model
165
-
166
- def load_model(self):
167
- return self.model
168
-
169
- def generate(self, prompt: str) -> str:
170
- res = self.model.invoke(prompt)
171
- if hasattr(res, 'content'):
172
- return res.content
173
- if isinstance(res, dict):
174
- return res.get('content') or res.get('text') or str(res)
175
- return str(res)
176
-
177
- async def a_generate(self, prompt: str) -> str:
178
- res = await self.model.ainvoke(prompt)
179
- return getattr(res, 'content', str(res))
180
-
181
- def get_model_name(self):
182
- return "Gemini Evaluator"
183
-
184
- # Length-based utility metric
185
- class LengthMetric(BaseMetric):
186
- def __init__(self, min_tokens: int = 1, max_tokens: int = 200):
187
- self.min_tokens = min_tokens
188
- self.max_tokens = max_tokens
189
- self.score = 0.0
190
- self.success = False
191
-
192
- def measure(self, test_case: LLMTestCase):
193
- text = (test_case.actual_output or "")
194
- tokens = len(text.split())
195
- mid = (self.min_tokens + self.max_tokens) / 2
196
- dist = abs(tokens - mid)
197
- max_dist = max(mid - self.min_tokens, self.max_tokens - mid)
198
- self.score = max(0.0, 1.0 - (dist / max_dist))
199
- self.success = (self.min_tokens <= tokens <= self.max_tokens)
200
- return self.score
201
-
202
- async def a_measure(self, test_case: LLMTestCase):
203
- return self.measure(test_case)
204
-
205
- def is_successful(self):
206
- return self.success
207
-
208
- @property
209
- def name(self):
210
- return "Length Metric"
211
-
212
- # ----------- 8. Run DeepEval Tests (Gemini only) -----------
213
- def run_deepeval_tests(test_cases: List[LLMTestCase]):
214
- if ChatGoogleGenerativeAI is None or not GOOGLE_API_KEY:
215
- raise RuntimeError("Gemini API not available — set GOOGLE_API_KEY")
216
-
217
- genai.configure(api_key=GOOGLE_API_KEY)
218
- chat_model = ChatGoogleGenerativeAI(model="gemini-pro", google_api_key=GOOGLE_API_KEY)
219
- model_wrapper = GoogleVertexAI(model=chat_model)
220
-
221
- answer_relevancy_metric = AnswerRelevancyMetric(threshold=0.5, model=model_wrapper)
222
- hallucination_metric = HallucinationMetric(threshold=0.5, model=model_wrapper)
223
- length_metric = LengthMetric(min_tokens=3, max_tokens=200)
224
-
225
- results = []
226
- with mlflow.start_run(run_name=f"DeepEval-{datetime.now().strftime('%H%M%S')}", nested=True):
227
- for i, tc in enumerate(test_cases):
228
- mlflow.log_param(f"tc_{i}_input", tc.input)
229
- mlflow.log_param(f"tc_{i}_actual", tc.actual_output)
230
- if tc.context:
231
- mlflow.log_text("\n".join(tc.context), f"artifacts/tc_{i}_context.txt")
232
-
233
- answer_relevancy_metric.measure(tc)
234
- hallucination_metric.measure(tc)
235
- length_metric.measure(tc)
236
-
237
- entry = {
238
- "input": tc.input,
239
- "actual_output": tc.actual_output,
240
- "context": tc.context,
241
- "answer_relevancy_score": answer_relevancy_metric.score,
242
- "hallucination_score": hallucination_metric.score,
243
- "length_score": length_metric.score
244
- }
245
-
246
- mlflow.log_metric(f"tc_{i}_answer_relevancy", answer_relevancy_metric.score)
247
- mlflow.log_metric(f"tc_{i}_hallucination", hallucination_metric.score)
248
- mlflow.log_metric(f"tc_{i}_length", length_metric.score)
249
-
250
- results.append(entry)
251
- return results
252
-
253
- # ----------- 9. Gradio UI -----------
254
  with gr.Blocks() as demo:
255
- gr.Markdown("# 🛠️ Maintenance AI Assistant (Lightning AI Generator + Gemini Evaluator)")
256
-
257
- with gr.Tabs():
258
- with gr.TabItem("Chat (RAG)"):
259
- usage_counter = gr.State(value=0)
260
- session_start = gr.State(value=datetime.now().isoformat())
261
-
262
- question_input = gr.Textbox(label="Ask your maintenance question")
263
- answer_output = gr.Textbox(label="AI Response")
264
- ask_button = gr.Button("Get Answer")
265
-
266
- def track_usage(question, count, session_start):
267
- count += 1
268
- with mlflow.start_run(run_name=f"User-Interaction-{count}", nested=True):
269
- mlflow.log_param("question", question)
270
- mlflow.log_param("session_start", session_start)
271
- response = rag_pipeline(question)
272
- mlflow.log_metric("total_queries", count)
273
- return response, count, session_start
274
-
275
- ask_button.click(
276
- track_usage,
277
- inputs=[question_input, usage_counter, session_start],
278
- outputs=[answer_output, usage_counter, session_start]
279
- )
280
-
281
- with gr.TabItem("DeepEval Model Tests"):
282
- gr.Markdown("### Evaluate with Gemini (no expected output needed)")
283
-
284
- tc_input = gr.Textbox(label="Test Input (prompt)")
285
- tc_actual = gr.Textbox(label="Actual Output (leave empty to auto-generate via Lightning AI)")
286
- tc_context = gr.Textbox(label="Context (optional)")
287
-
288
- auto_generate = gr.Checkbox(label="Auto-generate actual output from RAG", value=True)
289
- run_button = gr.Button("Run DeepEval")
290
- eval_output = gr.JSON(label="Evaluation Results")
291
-
292
- def run_single_eval(inp, actual, context, autogen):
293
- if autogen or not actual.strip():
294
- actual_output = rag_pipeline(inp)
295
- else:
296
- actual_output = actual
297
-
298
- tc = LLMTestCase(
299
- input=inp,
300
- actual_output=actual_output,
301
- expected_output=None,
302
- context=[context] if context else None
303
- )
304
- results = run_deepeval_tests([tc])
305
- return results
306
-
307
- run_button.click(
308
- run_single_eval,
309
- inputs=[tc_input, tc_actual, tc_context, auto_generate],
310
- outputs=[eval_output]
311
- )
312
-
313
- if __name__ == "__main__":
314
  with mlflow.start_run(run_name="Deployment-Info"):
315
  mlflow.log_params({
316
- "app_version": "1.4.0",
317
- "deployment_platform": "Lightning AI / HuggingFace Space",
318
  "deployment_time": datetime.now().isoformat(),
319
  "code_version": os.getenv("GIT_COMMIT", "dev")
320
  })
 
 
321
  demo.launch()
 
 
1
+ '''
2
+ import os
3
+ import gradio as gr
4
+ import requests
5
+ from pinecone import Pinecone
6
+ from langchain.prompts import PromptTemplate
7
+ from langchain.chains.llm import LLMChain
8
+ from langchain.llms.base import LLM
9
+ from typing import Optional, List, Mapping, Any
10
+ from langchain.embeddings import HuggingFaceEmbeddings
11
+
12
+ # ----------- 1. Custom LLM to call your LitServe endpoint -----------
13
+ class LitServeLLM(LLM):
14
+ endpoint_url: str
15
+
16
+ def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
17
+ payload = {"prompt": prompt}
18
+ response = requests.post(self.endpoint_url, json=payload)
19
+ if response.status_code == 200:
20
+ data = response.json()
21
+ return data.get("response", "").strip()
22
+ else:
23
+ raise ValueError(f"Request failed: {response.status_code} {response.text}")
24
+
25
+ @property
26
+ def _identifying_params(self) -> Mapping[str, Any]:
27
+ return {"endpoint_url": self.endpoint_url}
28
+
29
+ @property
30
+ def _llm_type(self) -> str:
31
+ return "litserve_llm"
32
+
33
+
34
+ # ----------- 2. Connect to Pinecone -----------
35
+ PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
36
+ pc = Pinecone(api_key=PINECONE_API_KEY)
37
+ index = pc.Index("rag-granite-index")
38
+
39
+ # ----------- 3. Load embedding model -----------
40
+ embeddings_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
41
+
42
+ # ----------- 4. Function to get top context from Pinecone -----------
43
+ def get_retrieved_context(query: str, top_k=3):
44
+ query_embedding = embeddings_model.embed_query(query)
45
+ results = index.query(
46
+ namespace="rag-ns",
47
+ vector=query_embedding,
48
+ top_k=top_k,
49
+ include_metadata=True
50
+ )
51
+ context_parts = [match['metadata']['text'] for match in results['matches']]
52
+ return "\n".join(context_parts)
53
+
54
+ # ----------- 5. Create LLMChain with your model -----------
55
+ model = LitServeLLM(
56
+ endpoint_url="https://8001-01k2h9d9mervcmgfn66ybkpwvq.cloudspaces.litng.ai/predict"
57
+ )
58
+
59
+ prompt = PromptTemplate(
60
+ input_variables=["context", "question"],
61
+ template="""
62
+ You are a smart assistant. Based on the provided context, answer the question in 1–2 lines only.
63
+ If the context has more details, summarize it concisely.
64
+
65
+ Context:
66
+ {context}
67
+
68
+ Question: {question}
69
+
70
+ Answer:
71
+ """
72
+ )
73
+
74
+ llm_chain = LLMChain(llm=model, prompt=prompt)
75
+
76
+ # ----------- 6. Main RAG Function -----------
77
+ def rag_pipeline(question):
78
+ try:
79
+ retrieved_context = get_retrieved_context(question)
80
+ response = llm_chain.invoke({
81
+ "context": retrieved_context,
82
+ "question": question
83
+ })["text"].strip()
84
+
85
+ # Only keep what's after "Answer:"
86
+ if "Answer:" in response:
87
+ response = response.split("Answer:", 1)[-1].strip()
88
+
89
+ return response
90
+ except Exception as e:
91
+ return f"Error: {str(e)}"
92
+
93
+
94
+ # ----------- 7. Gradio UI -----------
95
+ with gr.Blocks() as demo:
96
+ gr.Markdown("# 🧠 RAG Chatbot (Pinecone + LitServe)")
97
+ question_input = gr.Textbox(label="Ask your question here")
98
+ answer_output = gr.Textbox(label="Answer")
99
+ ask_button = gr.Button("Get Answer")
100
+ ask_button.click(rag_pipeline, inputs=question_input, outputs=answer_output)
101
+
102
+ if _name_ == "_main_":
103
+ demo.launch()
104
+ '''
105
+
106
+
107
+
108
+
109
  import os
110
  import gradio as gr
111
  import requests
 
116
  from langchain.chains.llm import LLMChain
117
  from langchain.llms.base import LLM
118
  from typing import Optional, List, Mapping, Any
119
+ import time
120
  from langchain_community.embeddings import HuggingFaceEmbeddings
121
  from dotenv import load_dotenv
122
  from datetime import datetime
123
 
124
+ # Load environment variables
125
+ pinecone_api_key = os.environ["PINECONE_API_KEY"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
+ mlflow_tracking_uri = os.environ["MLFLOW_TRACKING_URI"]
128
+
129
+ # ----------- DagsHub & MLflow Setup -----------
130
 
131
+ dagshub.init(
132
+ repo_owner='prathamesh.khade20',
133
+ repo_name='Maintenance_AI_website',
134
+ mlflow=True
135
+ )
136
+
137
+ mlflow.set_tracking_uri(mlflow_tracking_uri)
138
+ mlflow.set_experiment("Maintenance-RAG-Chatbot")
139
+ mlflow.langchain.autolog()
140
+
141
+
142
+
143
+ # Initialize MLflow run for app configuration
144
+ with mlflow.start_run(run_name=f"App-Config-{datetime.now().strftime('%Y%m%d-%H%M%S')}") as setup_run:
145
+ # Log environment configuration
146
+ mlflow.log_params({
147
+ "pinecone_index": "rag-granite-index",
148
+ "embedding_model": "all-MiniLM-L6-v2",
149
+ "namespace": "rag-ns",
150
+ "top_k": 3,
151
+ "llm_endpoint": "https://8001-01k2h9d9mervcmgfn66ybkpwvq.cloudspaces.litng.ai/predict"
152
+ })
153
+
154
+ # Log important files as artifacts
155
+
156
+ mlflow.log_text("""
157
  You are a smart assistant. Based on the provided context, answer the question in 1–2 lines only.
158
  If the context has more details, summarize it concisely.
159
  Context:
160
  {context}
161
  Question: {question}
162
  Answer:
163
+ """, "artifacts/prompt_template.txt")
164
 
165
+ # ----------- 1. Custom LLM for LitServe endpoint -----------
166
  class LitServeLLM(LLM):
167
  endpoint_url: str
168
 
169
+ @mlflow.trace
170
  def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
171
  payload = {"prompt": prompt}
172
+
173
+ with mlflow.start_span("lit_serve_request"):
174
+ start_time = time.time()
175
+ response = requests.post(self.endpoint_url, json=payload)
176
+ latency = time.time() - start_time
177
+
178
+ mlflow.log_metric("lit_serve_latency", latency)
179
+
180
+ if response.status_code == 200:
181
+ data = response.json()
182
+ mlflow.log_metric("response_tokens", len(data.get("response", "").split()))
183
+ return data.get("response", "").strip()
184
+ else:
185
+ mlflow.log_metric("request_errors", 1)
186
+ error_info = {
187
+ "status_code": response.status_code,
188
+ "error": response.text,
189
+ "timestamp": datetime.now().isoformat()
190
+ }
191
+ mlflow.log_dict(error_info, "artifacts/error_log.json")
192
+ raise ValueError(f"Request failed: {response.status_code}")
193
 
194
  @property
195
  def _identifying_params(self) -> Mapping[str, Any]:
 
200
  return "litserve_llm"
201
 
202
  # ----------- 2. Pinecone Connection -----------
203
+ @mlflow.trace
204
  def init_pinecone():
205
+ PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
206
  pc = Pinecone(api_key=PINECONE_API_KEY)
207
  return pc.Index("rag-granite-index")
208
 
209
+ index = init_pinecone()
 
 
 
210
 
211
  # ----------- 3. Embedding Model -----------
212
  embeddings_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
213
 
214
+ # ----------- 4. Context Retrieval with Tracing -----------
215
+ @mlflow.trace
216
  def get_retrieved_context(query: str, top_k=3):
217
+ """Retrieve context from Pinecone with performance tracing"""
218
+ with mlflow.start_span("embedding_generation"):
219
+ start_time = time.time()
220
+ query_embedding = embeddings_model.embed_query(query)
221
+ mlflow.log_metric("embedding_latency", time.time() - start_time)
222
+
223
+ with mlflow.start_span("pinecone_query"):
224
+ start_time = time.time()
225
+ results = index.query(
226
+ namespace="rag-ns",
227
+ vector=query_embedding,
228
+ top_k=top_k,
229
+ include_metadata=True
230
+ )
231
+ mlflow.log_metric("pinecone_latency", time.time() - start_time)
232
+ mlflow.log_metric("retrieved_chunks", len(results['matches']))
233
+
234
  context_parts = [match['metadata']['text'] for match in results['matches']]
235
  return "\n".join(context_parts)
236
 
237
  # ----------- 5. LLM Chain Setup -----------
238
+ model = LitServeLLM(
239
+ endpoint_url="https://8001-01k2h9d9mervcmgfn66ybkpwvq.cloudspaces.litng.ai/predict"
240
+ )
241
+
242
+ prompt = PromptTemplate(
243
+ input_variables=["context", "question"],
244
+ template="""
245
+ You are a smart assistant. Based on the provided context, answer the question in 1–2 lines only.
246
+ If the context has more details, summarize it concisely.
247
+ Context:
248
+ {context}
249
+ Question: {question}
250
+ Answer:
251
+ """
252
+ )
253
+
254
  llm_chain = LLMChain(llm=model, prompt=prompt)
255
 
256
+ # ----------- 6. RAG Pipeline with Full Tracing -----------
257
+ @mlflow.trace
258
  def rag_pipeline(question):
259
+ """End-to-end RAG pipeline with MLflow tracing"""
260
  try:
261
+ # Start a new nested run for each query
262
  with mlflow.start_run(run_name=f"Query-{datetime.now().strftime('%H%M%S')}", nested=True):
263
  mlflow.log_param("user_question", question)
264
+
265
+ # Retrieve context
266
  retrieved_context = get_retrieved_context(question)
267
  mlflow.log_text(retrieved_context, "artifacts/retrieved_context.txt")
268
+
269
+ # Generate response
270
  start_time = time.time()
271
+ response = llm_chain.invoke({
272
+ "context": retrieved_context,
273
+ "question": question
274
+ })["text"].strip()
275
+
276
+ # Clean response
277
  if "Answer:" in response:
278
  response = response.split("Answer:", 1)[-1].strip()
279
+
280
+ # Log metrics
281
  mlflow.log_metric("response_latency", time.time() - start_time)
282
  mlflow.log_metric("response_length", len(response))
283
  mlflow.log_text(response, "artifacts/response.txt")
284
+
285
  return response
286
+
287
  except Exception as e:
288
  mlflow.log_metric("pipeline_errors", 1)
289
+ error_info = {
290
+ "error": str(e),
291
+ "question": question,
292
+ "timestamp": datetime.now().isoformat()
293
+ }
294
  mlflow.log_dict(error_info, "artifacts/pipeline_errors.json")
295
  return f"Error: {str(e)}"
296
 
297
+ # ----------- 7. Gradio UI with Enhanced Tracking -----------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
  with gr.Blocks() as demo:
299
+ gr.Markdown("# 🛠 Maintenance AI Assistant")
300
+
301
+ # Track additional UI metrics
302
+ usage_counter = gr.State(value=0)
303
+ session_start = gr.State(value=datetime.now().isoformat())
304
+
305
+ question_input = gr.Textbox(label="Ask your maintenance question")
306
+ answer_output = gr.Textbox(label="AI Response")
307
+ ask_button = gr.Button("Get Answer")
308
+ feedback = gr.Radio(["Helpful", "Not Helpful"], label="Was this response helpful?")
309
+
310
+ def track_usage(question, count, session_start, feedback=None):
311
+ """Wrapper to track usage metrics with feedback"""
312
+ count += 1
313
+
314
+ # Start tracking context
315
+ with mlflow.start_run(run_name=f"User-Interaction-{count}", nested=True):
316
+ mlflow.log_param("question", question)
317
+ mlflow.log_param("session_start", session_start)
318
+
319
+ # Get response
320
+ response = rag_pipeline(question)
321
+
322
+ # Log feedback if provided
323
+ if feedback:
324
+ mlflow.log_param("user_feedback", feedback)
325
+ mlflow.log_metric("helpful_responses", 1 if feedback == "Helpful" else 0)
326
+
327
+ # Update metrics
328
+ mlflow.log_metric("total_queries", count)
329
+
330
+ return response, count, session_start
331
+
332
+ ask_button.click(
333
+ track_usage,
334
+ inputs=[question_input, usage_counter, session_start],
335
+ outputs=[answer_output, usage_counter, session_start]
336
+ )
337
+
338
+ feedback.change(
339
+ track_usage,
340
+ inputs=[question_input, usage_counter, session_start, feedback],
341
+ outputs=[answer_output, usage_counter, session_start]
342
+ )
343
+
344
+ if _name_ == "_main_":
345
+ # Log deployment information
 
 
 
 
 
 
 
 
 
 
 
 
346
  with mlflow.start_run(run_name="Deployment-Info"):
347
  mlflow.log_params({
348
+ "app_version": "1.0.0",
349
+ "deployment_platform": "Lightning AI",
350
  "deployment_time": datetime.now().isoformat(),
351
  "code_version": os.getenv("GIT_COMMIT", "dev")
352
  })
353
+
354
+ # Start Gradio app
355
  demo.launch()
356
+