galbendavids commited on
Commit
80f29b9
ยท
1 Parent(s): 00553ad

Add better error handling and logging for debugging HF Spaces issues

Browse files
2_backend_llm/app/data_loader.py CHANGED
@@ -16,11 +16,34 @@ def load_feedback(csv_path: str | None = None) -> pd.DataFrame:
16
  path_str = csv_path or settings.csv_path
17
  # Resolve path relative to project root if it's a relative path
18
  if Path(path_str).is_absolute():
19
- path = path_str
20
  else:
21
  # Calculate project root: 2_backend_llm/app/data_loader.py -> root/
22
  project_root = Path(__file__).resolve().parent.parent.parent
23
  path = project_root / path_str
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  df = pd.read_csv(path)
25
  # Basic normalization of expected columns if present
26
  expected = ["ID", "ServiceName", "Level", "Text"]
 
16
  path_str = csv_path or settings.csv_path
17
  # Resolve path relative to project root if it's a relative path
18
  if Path(path_str).is_absolute():
19
+ path = Path(path_str)
20
  else:
21
  # Calculate project root: 2_backend_llm/app/data_loader.py -> root/
22
  project_root = Path(__file__).resolve().parent.parent.parent
23
  path = project_root / path_str
24
+
25
+ # Debug: print path information
26
+ print(f"๐Ÿ” Looking for CSV file at: {path}", flush=True)
27
+ print(f"๐Ÿ” Path exists: {path.exists()}", flush=True)
28
+ print(f"๐Ÿ” Current working directory: {Path.cwd()}", flush=True)
29
+ print(f"๐Ÿ” Project root: {project_root}", flush=True)
30
+
31
+ if not path.exists():
32
+ # Try alternative paths
33
+ alt_paths = [
34
+ project_root / "0_preprocessing" / "feedback_transformed_2.csv",
35
+ Path("0_preprocessing") / "feedback_transformed_2.csv",
36
+ Path("feedback_transformed_2.csv"),
37
+ ]
38
+ for alt_path in alt_paths:
39
+ if alt_path.exists():
40
+ print(f"โœ… Found CSV at alternative path: {alt_path}", flush=True)
41
+ path = alt_path
42
+ break
43
+ else:
44
+ raise FileNotFoundError(f"CSV file not found at {path}. Tried: {[str(p) for p in [path] + alt_paths]}")
45
+
46
+ print(f"โœ… Loading CSV from: {path}", flush=True)
47
  df = pd.read_csv(path)
48
  # Basic normalization of expected columns if present
49
  expected = ["ID", "ServiceName", "Level", "Text"]
2_backend_llm/app/sql_service.py CHANGED
@@ -327,14 +327,47 @@ class SQLFeedbackService:
327
  3. Execute SQL queries
328
  4. Synthesize answer
329
  """
 
 
330
  if self.df is None:
331
- raise ValueError("No feedback data available. Please ensure feedback_transformed_2.csv exists in 0_preprocessing/ directory.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
332
 
333
  # Step 1: Generate SQL queries (with gibberish validation)
334
  try:
 
335
  sql_queries = self._generate_sql_queries(query)
 
 
 
 
 
 
 
 
 
 
 
336
  except ValueError as e:
337
  # If query is gibberish, return a friendly error message
 
338
  return AnalysisResult(
339
  user_query=query,
340
  sql_queries=[],
@@ -342,11 +375,27 @@ class SQLFeedbackService:
342
  summary=str(e),
343
  visualizations=None
344
  )
 
 
 
 
 
 
 
 
 
 
 
 
345
 
346
  # Step 2: Execute SQL queries
 
347
  query_results = self._execute_sql_queries(sql_queries)
 
 
348
 
349
  # Step 3: Synthesize answer
 
350
  summary = self._synthesize_answer(query, sql_queries, query_results)
351
 
352
  # Step 4: (Optional) Generate visualizations
@@ -425,18 +474,27 @@ class SQLFeedbackService:
425
  # Try Gemini first
426
  if settings.gemini_api_key and genai is not None:
427
  try:
 
428
  genai.configure(api_key=settings.gemini_api_key)
429
  model = genai.GenerativeModel("gemini-2.0-flash")
430
  response = model.generate_content(prompt)
431
  text = getattr(response, "text", None)
432
  if text:
433
- return self._parse_sql_queries(text)
 
 
 
 
 
434
  except Exception as e:
435
- print(f"Gemini error in SQL generation: {e}", flush=True)
 
 
436
 
437
  # Fallback to OpenAI
438
  if settings.openai_api_key and OpenAI is not None:
439
  try:
 
440
  client = OpenAI(api_key=settings.openai_api_key)
441
  response = client.chat.completions.create(
442
  model="gpt-4o-mini",
@@ -445,11 +503,19 @@ class SQLFeedbackService:
445
  )
446
  text = response.choices[0].message.content
447
  if text:
448
- return self._parse_sql_queries(text)
 
 
 
 
 
449
  except Exception as e:
450
- print(f"OpenAI error in SQL generation: {e}", flush=True)
 
 
451
 
452
  # Fallback: return empty list
 
453
  return []
454
 
455
  def _parse_sql_queries(self, text: str) -> List[str]:
@@ -870,9 +936,17 @@ class SQLFeedbackService:
870
  # Fallback: generate a detailed answer from query results even if LLM failed
871
  # This ensures we always return a meaningful answer, not just a status message
872
  successful_results = [r for r in query_results if not r.error and len(r.result) > 0]
 
 
 
 
 
873
  if successful_results:
874
  fallback_text = f"ืกื™ื›ื•ื ืžืคื•ืจื˜ ืฉืœ ื”ืžืžืฆืื™ื:\n\n"
875
- fallback_text += f"ื‘ื•ืฆืขื• {len(sql_queries)} ืฉืื™ืœืชื•ืช, ืžืชื•ื›ืŸ {len(successful_results)} ื”ืฆืœื™ื—ื• ื•ื”ื—ื–ื™ืจื• ืชื•ืฆืื•ืช.\n\n"
 
 
 
876
 
877
  # Analyze and summarize each result
878
  for i, qr in enumerate(successful_results, 1):
@@ -896,11 +970,24 @@ class SQLFeedbackService:
896
  fallback_text += qr.result.head(5).to_string(index=False)
897
  fallback_text += "\n\n"
898
 
 
 
 
 
 
 
899
  fallback_text += "ื”ืขืจื”: ืชืฉื•ื‘ื” ื–ื• ื ื•ืฆืจื” ืื•ื˜ื•ืžื˜ื™ืช ืžื”ืชื•ืฆืื•ืช. ืœื ื™ืชื•ื— ืžืคื•ืจื˜ ื™ื•ืชืจ, ื ืกื” ืœืฉืื•ืœ ืฉืืœื” ืกืคืฆื™ืคื™ืช ื™ื•ืชืจ."
900
  return fallback_text
901
  else:
902
- # If no successful results, still provide a helpful message
903
- return f"ื‘ื•ืฆืขื• {len(sql_queries)} ืฉืื™ืœืชื•ืช, ืืš ืœื ื”ืชืงื‘ืœื• ืชื•ืฆืื•ืช ืžื”ื ืชื•ื ื™ื.\n\nื™ื™ืชื›ืŸ ืฉื”ื ืชื•ื ื™ื ืœื ืžื›ื™ืœื™ื ืžื™ื“ืข ื”ืชื•ืื ืœืฉืืœื” ืฉื ืฉืืœื”. ื ืกื” ืœืฉืื•ืœ ืฉืืœื” ืื—ืจืช ืื• ืœื‘ื“ื•ืง ืืช ื”ื ืชื•ื ื™ื ื”ื–ืžื™ื ื™ื."
 
 
 
 
 
 
 
904
 
905
  def _generate_visualizations(self, query_results: List[SQLQueryResult]) -> Optional[List[Dict[str, Any]]]:
906
  """
 
327
  3. Execute SQL queries
328
  4. Synthesize answer
329
  """
330
+ print(f"๐Ÿ” Analyzing query: {query}", flush=True)
331
+
332
  if self.df is None:
333
+ error_msg = "No feedback data available. Please ensure feedback_transformed_2.csv exists in 0_preprocessing/ directory."
334
+ print(f"โŒ {error_msg}", flush=True)
335
+ raise ValueError(error_msg)
336
+
337
+ print(f"โœ… Data loaded: {len(self.df)} rows", flush=True)
338
+
339
+ # Check API keys
340
+ if not settings.gemini_api_key and not settings.openai_api_key:
341
+ error_msg = "โŒ No API keys configured! Please set GEMINI_API_KEY or OPENAI_API_KEY in Repository secrets."
342
+ print(error_msg, flush=True)
343
+ return AnalysisResult(
344
+ user_query=query,
345
+ sql_queries=[],
346
+ query_results=[],
347
+ summary=error_msg,
348
+ visualizations=None
349
+ )
350
+
351
+ print(f"โœ… API keys available: Gemini={bool(settings.gemini_api_key)}, OpenAI={bool(settings.openai_api_key)}", flush=True)
352
 
353
  # Step 1: Generate SQL queries (with gibberish validation)
354
  try:
355
+ print("๐Ÿ” Generating SQL queries...", flush=True)
356
  sql_queries = self._generate_sql_queries(query)
357
+ print(f"โœ… Generated {len(sql_queries)} SQL queries", flush=True)
358
+ if len(sql_queries) == 0:
359
+ error_msg = "ืœื ื ื•ืฆืจื• ืฉืื™ืœืชื•ืช SQL. ื™ื™ืชื›ืŸ ืฉื”ืฉืืœื” ืœื ื‘ืจื•ืจื” ืื• ืฉื™ืฉ ื‘ืขื™ื” ืขื ื”-API. ื ืกื” ืœืฉืื•ืœ ืฉืืœื” ืื—ืจืช ืื• ื‘ื“ื•ืง ืืช ื”-API keys."
360
+ print(f"โŒ {error_msg}", flush=True)
361
+ return AnalysisResult(
362
+ user_query=query,
363
+ sql_queries=[],
364
+ query_results=[],
365
+ summary=error_msg,
366
+ visualizations=None
367
+ )
368
  except ValueError as e:
369
  # If query is gibberish, return a friendly error message
370
+ print(f"โŒ Query validation error: {e}", flush=True)
371
  return AnalysisResult(
372
  user_query=query,
373
  sql_queries=[],
 
375
  summary=str(e),
376
  visualizations=None
377
  )
378
+ except Exception as e:
379
+ error_msg = f"ืฉื’ื™ืื” ื‘ื™ืฆื™ืจืช ืฉืื™ืœืชื•ืช SQL: {str(e)}. ื‘ื“ื•ืง ืืช ื”-API keys ื•ื”ื—ื™ื‘ื•ืจ ืœืื™ื ื˜ืจื ื˜."
380
+ print(f"โŒ {error_msg}", flush=True)
381
+ import traceback
382
+ traceback.print_exc()
383
+ return AnalysisResult(
384
+ user_query=query,
385
+ sql_queries=[],
386
+ query_results=[],
387
+ summary=error_msg,
388
+ visualizations=None
389
+ )
390
 
391
  # Step 2: Execute SQL queries
392
+ print("๐Ÿ” Executing SQL queries...", flush=True)
393
  query_results = self._execute_sql_queries(sql_queries)
394
+ successful_results = [r for r in query_results if not r.error and len(r.result) > 0]
395
+ print(f"โœ… Executed {len(query_results)} queries, {len(successful_results)} successful", flush=True)
396
 
397
  # Step 3: Synthesize answer
398
+ print("๐Ÿ” Synthesizing answer...", flush=True)
399
  summary = self._synthesize_answer(query, sql_queries, query_results)
400
 
401
  # Step 4: (Optional) Generate visualizations
 
474
  # Try Gemini first
475
  if settings.gemini_api_key and genai is not None:
476
  try:
477
+ print("๐Ÿ” Using Gemini API for SQL generation...", flush=True)
478
  genai.configure(api_key=settings.gemini_api_key)
479
  model = genai.GenerativeModel("gemini-2.0-flash")
480
  response = model.generate_content(prompt)
481
  text = getattr(response, "text", None)
482
  if text:
483
+ print(f"โœ… Received response from Gemini: {text[:200]}...", flush=True)
484
+ queries = self._parse_sql_queries(text)
485
+ print(f"โœ… Parsed {len(queries)} SQL queries from Gemini response", flush=True)
486
+ return queries
487
+ else:
488
+ print("โŒ Gemini returned empty response", flush=True)
489
  except Exception as e:
490
+ print(f"โŒ Gemini error in SQL generation: {e}", flush=True)
491
+ import traceback
492
+ traceback.print_exc()
493
 
494
  # Fallback to OpenAI
495
  if settings.openai_api_key and OpenAI is not None:
496
  try:
497
+ print("๐Ÿ” Using OpenAI API for SQL generation...", flush=True)
498
  client = OpenAI(api_key=settings.openai_api_key)
499
  response = client.chat.completions.create(
500
  model="gpt-4o-mini",
 
503
  )
504
  text = response.choices[0].message.content
505
  if text:
506
+ print(f"โœ… Received response from OpenAI: {text[:200]}...", flush=True)
507
+ queries = self._parse_sql_queries(text)
508
+ print(f"โœ… Parsed {len(queries)} SQL queries from OpenAI response", flush=True)
509
+ return queries
510
+ else:
511
+ print("โŒ OpenAI returned empty response", flush=True)
512
  except Exception as e:
513
+ print(f"โŒ OpenAI error in SQL generation: {e}", flush=True)
514
+ import traceback
515
+ traceback.print_exc()
516
 
517
  # Fallback: return empty list
518
+ print("โŒ No API available or all APIs failed. Returning empty query list.", flush=True)
519
  return []
520
 
521
  def _parse_sql_queries(self, text: str) -> List[str]:
 
936
  # Fallback: generate a detailed answer from query results even if LLM failed
937
  # This ensures we always return a meaningful answer, not just a status message
938
  successful_results = [r for r in query_results if not r.error and len(r.result) > 0]
939
+ failed_results = [r for r in query_results if r.error]
940
+
941
+ if len(sql_queries) == 0:
942
+ return "ืœื ื ื•ืฆืจื• ืฉืื™ืœืชื•ืช SQL. ื™ื™ืชื›ืŸ ืฉื”ืฉืืœื” ืœื ื‘ืจื•ืจื” ืื• ืฉื™ืฉ ื‘ืขื™ื” ืขื ื”-API. ื ืกื” ืœืฉืื•ืœ ืฉืืœื” ืื—ืจืช ืื• ื‘ื“ื•ืง ืืช ื”-API keys ื‘-Repository secrets."
943
+
944
  if successful_results:
945
  fallback_text = f"ืกื™ื›ื•ื ืžืคื•ืจื˜ ืฉืœ ื”ืžืžืฆืื™ื:\n\n"
946
+ fallback_text += f"ื‘ื•ืฆืขื• {len(sql_queries)} ืฉืื™ืœืชื•ืช, ืžืชื•ื›ืŸ {len(successful_results)} ื”ืฆืœื™ื—ื• ื•ื”ื—ื–ื™ืจื• ืชื•ืฆืื•ืช.\n"
947
+ if failed_results:
948
+ fallback_text += f"โš ๏ธ {len(failed_results)} ืฉืื™ืœืชื•ืช ื ื›ืฉืœื•.\n"
949
+ fallback_text += "\n"
950
 
951
  # Analyze and summarize each result
952
  for i, qr in enumerate(successful_results, 1):
 
970
  fallback_text += qr.result.head(5).to_string(index=False)
971
  fallback_text += "\n\n"
972
 
973
+ if failed_results:
974
+ fallback_text += "\nืฉื’ื™ืื•ืช ื‘ืฉืื™ืœืชื•ืช:\n"
975
+ for i, qr in enumerate(failed_results, 1):
976
+ fallback_text += f"ืฉืื™ืœืชื” {i}: {qr.error}\n"
977
+ fallback_text += "\n"
978
+
979
  fallback_text += "ื”ืขืจื”: ืชืฉื•ื‘ื” ื–ื• ื ื•ืฆืจื” ืื•ื˜ื•ืžื˜ื™ืช ืžื”ืชื•ืฆืื•ืช. ืœื ื™ืชื•ื— ืžืคื•ืจื˜ ื™ื•ืชืจ, ื ืกื” ืœืฉืื•ืœ ืฉืืœื” ืกืคืฆื™ืคื™ืช ื™ื•ืชืจ."
980
  return fallback_text
981
  else:
982
+ # If no successful results, provide detailed error information
983
+ error_details = ""
984
+ if failed_results:
985
+ error_details = "\n\nืฉื’ื™ืื•ืช ื‘ืฉืื™ืœืชื•ืช:\n"
986
+ for i, qr in enumerate(failed_results, 1):
987
+ error_details += f"ืฉืื™ืœืชื” {i}: {qr.query}\n"
988
+ error_details += f"ืฉื’ื™ืื”: {qr.error}\n\n"
989
+
990
+ return f"ื‘ื•ืฆืขื• {len(sql_queries)} ืฉืื™ืœืชื•ืช, ืืš ืœื ื”ืชืงื‘ืœื• ืชื•ืฆืื•ืช ืžื”ื ืชื•ื ื™ื.{error_details}\nื™ื™ืชื›ืŸ ืฉื”ื ืชื•ื ื™ื ืœื ืžื›ื™ืœื™ื ืžื™ื“ืข ื”ืชื•ืื ืœืฉืืœื” ืฉื ืฉืืœื”. ื ืกื” ืœืฉืื•ืœ ืฉืืœื” ืื—ืจืช ืื• ืœื‘ื“ื•ืง ืืช ื”ื ืชื•ื ื™ื ื”ื–ืžื™ื ื™ื."
991
 
992
  def _generate_visualizations(self, query_results: List[SQLQueryResult]) -> Optional[List[Dict[str, Any]]]:
993
  """