Commit
ยท
80f29b9
1
Parent(s):
00553ad
Add better error handling and logging for debugging HF Spaces issues
Browse files
2_backend_llm/app/data_loader.py
CHANGED
|
@@ -16,11 +16,34 @@ def load_feedback(csv_path: str | None = None) -> pd.DataFrame:
|
|
| 16 |
path_str = csv_path or settings.csv_path
|
| 17 |
# Resolve path relative to project root if it's a relative path
|
| 18 |
if Path(path_str).is_absolute():
|
| 19 |
-
path = path_str
|
| 20 |
else:
|
| 21 |
# Calculate project root: 2_backend_llm/app/data_loader.py -> root/
|
| 22 |
project_root = Path(__file__).resolve().parent.parent.parent
|
| 23 |
path = project_root / path_str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
df = pd.read_csv(path)
|
| 25 |
# Basic normalization of expected columns if present
|
| 26 |
expected = ["ID", "ServiceName", "Level", "Text"]
|
|
|
|
| 16 |
path_str = csv_path or settings.csv_path
|
| 17 |
# Resolve path relative to project root if it's a relative path
|
| 18 |
if Path(path_str).is_absolute():
|
| 19 |
+
path = Path(path_str)
|
| 20 |
else:
|
| 21 |
# Calculate project root: 2_backend_llm/app/data_loader.py -> root/
|
| 22 |
project_root = Path(__file__).resolve().parent.parent.parent
|
| 23 |
path = project_root / path_str
|
| 24 |
+
|
| 25 |
+
# Debug: print path information
|
| 26 |
+
print(f"๐ Looking for CSV file at: {path}", flush=True)
|
| 27 |
+
print(f"๐ Path exists: {path.exists()}", flush=True)
|
| 28 |
+
print(f"๐ Current working directory: {Path.cwd()}", flush=True)
|
| 29 |
+
print(f"๐ Project root: {project_root}", flush=True)
|
| 30 |
+
|
| 31 |
+
if not path.exists():
|
| 32 |
+
# Try alternative paths
|
| 33 |
+
alt_paths = [
|
| 34 |
+
project_root / "0_preprocessing" / "feedback_transformed_2.csv",
|
| 35 |
+
Path("0_preprocessing") / "feedback_transformed_2.csv",
|
| 36 |
+
Path("feedback_transformed_2.csv"),
|
| 37 |
+
]
|
| 38 |
+
for alt_path in alt_paths:
|
| 39 |
+
if alt_path.exists():
|
| 40 |
+
print(f"โ
Found CSV at alternative path: {alt_path}", flush=True)
|
| 41 |
+
path = alt_path
|
| 42 |
+
break
|
| 43 |
+
else:
|
| 44 |
+
raise FileNotFoundError(f"CSV file not found at {path}. Tried: {[str(p) for p in [path] + alt_paths]}")
|
| 45 |
+
|
| 46 |
+
print(f"โ
Loading CSV from: {path}", flush=True)
|
| 47 |
df = pd.read_csv(path)
|
| 48 |
# Basic normalization of expected columns if present
|
| 49 |
expected = ["ID", "ServiceName", "Level", "Text"]
|
2_backend_llm/app/sql_service.py
CHANGED
|
@@ -327,14 +327,47 @@ class SQLFeedbackService:
|
|
| 327 |
3. Execute SQL queries
|
| 328 |
4. Synthesize answer
|
| 329 |
"""
|
|
|
|
|
|
|
| 330 |
if self.df is None:
|
| 331 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 332 |
|
| 333 |
# Step 1: Generate SQL queries (with gibberish validation)
|
| 334 |
try:
|
|
|
|
| 335 |
sql_queries = self._generate_sql_queries(query)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 336 |
except ValueError as e:
|
| 337 |
# If query is gibberish, return a friendly error message
|
|
|
|
| 338 |
return AnalysisResult(
|
| 339 |
user_query=query,
|
| 340 |
sql_queries=[],
|
|
@@ -342,11 +375,27 @@ class SQLFeedbackService:
|
|
| 342 |
summary=str(e),
|
| 343 |
visualizations=None
|
| 344 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
|
| 346 |
# Step 2: Execute SQL queries
|
|
|
|
| 347 |
query_results = self._execute_sql_queries(sql_queries)
|
|
|
|
|
|
|
| 348 |
|
| 349 |
# Step 3: Synthesize answer
|
|
|
|
| 350 |
summary = self._synthesize_answer(query, sql_queries, query_results)
|
| 351 |
|
| 352 |
# Step 4: (Optional) Generate visualizations
|
|
@@ -425,18 +474,27 @@ class SQLFeedbackService:
|
|
| 425 |
# Try Gemini first
|
| 426 |
if settings.gemini_api_key and genai is not None:
|
| 427 |
try:
|
|
|
|
| 428 |
genai.configure(api_key=settings.gemini_api_key)
|
| 429 |
model = genai.GenerativeModel("gemini-2.0-flash")
|
| 430 |
response = model.generate_content(prompt)
|
| 431 |
text = getattr(response, "text", None)
|
| 432 |
if text:
|
| 433 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 434 |
except Exception as e:
|
| 435 |
-
print(f"Gemini error in SQL generation: {e}", flush=True)
|
|
|
|
|
|
|
| 436 |
|
| 437 |
# Fallback to OpenAI
|
| 438 |
if settings.openai_api_key and OpenAI is not None:
|
| 439 |
try:
|
|
|
|
| 440 |
client = OpenAI(api_key=settings.openai_api_key)
|
| 441 |
response = client.chat.completions.create(
|
| 442 |
model="gpt-4o-mini",
|
|
@@ -445,11 +503,19 @@ class SQLFeedbackService:
|
|
| 445 |
)
|
| 446 |
text = response.choices[0].message.content
|
| 447 |
if text:
|
| 448 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 449 |
except Exception as e:
|
| 450 |
-
print(f"OpenAI error in SQL generation: {e}", flush=True)
|
|
|
|
|
|
|
| 451 |
|
| 452 |
# Fallback: return empty list
|
|
|
|
| 453 |
return []
|
| 454 |
|
| 455 |
def _parse_sql_queries(self, text: str) -> List[str]:
|
|
@@ -870,9 +936,17 @@ class SQLFeedbackService:
|
|
| 870 |
# Fallback: generate a detailed answer from query results even if LLM failed
|
| 871 |
# This ensures we always return a meaningful answer, not just a status message
|
| 872 |
successful_results = [r for r in query_results if not r.error and len(r.result) > 0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 873 |
if successful_results:
|
| 874 |
fallback_text = f"ืกืืืื ืืคืืจื ืฉื ืืืืฆืืื:\n\n"
|
| 875 |
-
fallback_text += f"ืืืฆืขื {len(sql_queries)} ืฉืืืืชืืช, ืืชืืื {len(successful_results)} ืืฆืืืื ืืืืืืจื ืชืืฆืืืช.\n
|
|
|
|
|
|
|
|
|
|
| 876 |
|
| 877 |
# Analyze and summarize each result
|
| 878 |
for i, qr in enumerate(successful_results, 1):
|
|
@@ -896,11 +970,24 @@ class SQLFeedbackService:
|
|
| 896 |
fallback_text += qr.result.head(5).to_string(index=False)
|
| 897 |
fallback_text += "\n\n"
|
| 898 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 899 |
fallback_text += "ืืขืจื: ืชืฉืืื ืื ื ืืฆืจื ืืืืืืืืช ืืืชืืฆืืืช. ืื ืืชืื ืืคืืจื ืืืชืจ, ื ืกื ืืฉืืื ืฉืืื ืกืคืฆืืคืืช ืืืชืจ."
|
| 900 |
return fallback_text
|
| 901 |
else:
|
| 902 |
-
# If no successful results,
|
| 903 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 904 |
|
| 905 |
def _generate_visualizations(self, query_results: List[SQLQueryResult]) -> Optional[List[Dict[str, Any]]]:
|
| 906 |
"""
|
|
|
|
| 327 |
3. Execute SQL queries
|
| 328 |
4. Synthesize answer
|
| 329 |
"""
|
| 330 |
+
print(f"๐ Analyzing query: {query}", flush=True)
|
| 331 |
+
|
| 332 |
if self.df is None:
|
| 333 |
+
error_msg = "No feedback data available. Please ensure feedback_transformed_2.csv exists in 0_preprocessing/ directory."
|
| 334 |
+
print(f"โ {error_msg}", flush=True)
|
| 335 |
+
raise ValueError(error_msg)
|
| 336 |
+
|
| 337 |
+
print(f"โ
Data loaded: {len(self.df)} rows", flush=True)
|
| 338 |
+
|
| 339 |
+
# Check API keys
|
| 340 |
+
if not settings.gemini_api_key and not settings.openai_api_key:
|
| 341 |
+
error_msg = "โ No API keys configured! Please set GEMINI_API_KEY or OPENAI_API_KEY in Repository secrets."
|
| 342 |
+
print(error_msg, flush=True)
|
| 343 |
+
return AnalysisResult(
|
| 344 |
+
user_query=query,
|
| 345 |
+
sql_queries=[],
|
| 346 |
+
query_results=[],
|
| 347 |
+
summary=error_msg,
|
| 348 |
+
visualizations=None
|
| 349 |
+
)
|
| 350 |
+
|
| 351 |
+
print(f"โ
API keys available: Gemini={bool(settings.gemini_api_key)}, OpenAI={bool(settings.openai_api_key)}", flush=True)
|
| 352 |
|
| 353 |
# Step 1: Generate SQL queries (with gibberish validation)
|
| 354 |
try:
|
| 355 |
+
print("๐ Generating SQL queries...", flush=True)
|
| 356 |
sql_queries = self._generate_sql_queries(query)
|
| 357 |
+
print(f"โ
Generated {len(sql_queries)} SQL queries", flush=True)
|
| 358 |
+
if len(sql_queries) == 0:
|
| 359 |
+
error_msg = "ืื ื ืืฆืจื ืฉืืืืชืืช SQL. ืืืชืื ืฉืืฉืืื ืื ืืจืืจื ืื ืฉืืฉ ืืขืื ืขื ื-API. ื ืกื ืืฉืืื ืฉืืื ืืืจืช ืื ืืืืง ืืช ื-API keys."
|
| 360 |
+
print(f"โ {error_msg}", flush=True)
|
| 361 |
+
return AnalysisResult(
|
| 362 |
+
user_query=query,
|
| 363 |
+
sql_queries=[],
|
| 364 |
+
query_results=[],
|
| 365 |
+
summary=error_msg,
|
| 366 |
+
visualizations=None
|
| 367 |
+
)
|
| 368 |
except ValueError as e:
|
| 369 |
# If query is gibberish, return a friendly error message
|
| 370 |
+
print(f"โ Query validation error: {e}", flush=True)
|
| 371 |
return AnalysisResult(
|
| 372 |
user_query=query,
|
| 373 |
sql_queries=[],
|
|
|
|
| 375 |
summary=str(e),
|
| 376 |
visualizations=None
|
| 377 |
)
|
| 378 |
+
except Exception as e:
|
| 379 |
+
error_msg = f"ืฉืืืื ืืืฆืืจืช ืฉืืืืชืืช SQL: {str(e)}. ืืืืง ืืช ื-API keys ืืืืืืืจ ืืืื ืืจื ื."
|
| 380 |
+
print(f"โ {error_msg}", flush=True)
|
| 381 |
+
import traceback
|
| 382 |
+
traceback.print_exc()
|
| 383 |
+
return AnalysisResult(
|
| 384 |
+
user_query=query,
|
| 385 |
+
sql_queries=[],
|
| 386 |
+
query_results=[],
|
| 387 |
+
summary=error_msg,
|
| 388 |
+
visualizations=None
|
| 389 |
+
)
|
| 390 |
|
| 391 |
# Step 2: Execute SQL queries
|
| 392 |
+
print("๐ Executing SQL queries...", flush=True)
|
| 393 |
query_results = self._execute_sql_queries(sql_queries)
|
| 394 |
+
successful_results = [r for r in query_results if not r.error and len(r.result) > 0]
|
| 395 |
+
print(f"โ
Executed {len(query_results)} queries, {len(successful_results)} successful", flush=True)
|
| 396 |
|
| 397 |
# Step 3: Synthesize answer
|
| 398 |
+
print("๐ Synthesizing answer...", flush=True)
|
| 399 |
summary = self._synthesize_answer(query, sql_queries, query_results)
|
| 400 |
|
| 401 |
# Step 4: (Optional) Generate visualizations
|
|
|
|
| 474 |
# Try Gemini first
|
| 475 |
if settings.gemini_api_key and genai is not None:
|
| 476 |
try:
|
| 477 |
+
print("๐ Using Gemini API for SQL generation...", flush=True)
|
| 478 |
genai.configure(api_key=settings.gemini_api_key)
|
| 479 |
model = genai.GenerativeModel("gemini-2.0-flash")
|
| 480 |
response = model.generate_content(prompt)
|
| 481 |
text = getattr(response, "text", None)
|
| 482 |
if text:
|
| 483 |
+
print(f"โ
Received response from Gemini: {text[:200]}...", flush=True)
|
| 484 |
+
queries = self._parse_sql_queries(text)
|
| 485 |
+
print(f"โ
Parsed {len(queries)} SQL queries from Gemini response", flush=True)
|
| 486 |
+
return queries
|
| 487 |
+
else:
|
| 488 |
+
print("โ Gemini returned empty response", flush=True)
|
| 489 |
except Exception as e:
|
| 490 |
+
print(f"โ Gemini error in SQL generation: {e}", flush=True)
|
| 491 |
+
import traceback
|
| 492 |
+
traceback.print_exc()
|
| 493 |
|
| 494 |
# Fallback to OpenAI
|
| 495 |
if settings.openai_api_key and OpenAI is not None:
|
| 496 |
try:
|
| 497 |
+
print("๐ Using OpenAI API for SQL generation...", flush=True)
|
| 498 |
client = OpenAI(api_key=settings.openai_api_key)
|
| 499 |
response = client.chat.completions.create(
|
| 500 |
model="gpt-4o-mini",
|
|
|
|
| 503 |
)
|
| 504 |
text = response.choices[0].message.content
|
| 505 |
if text:
|
| 506 |
+
print(f"โ
Received response from OpenAI: {text[:200]}...", flush=True)
|
| 507 |
+
queries = self._parse_sql_queries(text)
|
| 508 |
+
print(f"โ
Parsed {len(queries)} SQL queries from OpenAI response", flush=True)
|
| 509 |
+
return queries
|
| 510 |
+
else:
|
| 511 |
+
print("โ OpenAI returned empty response", flush=True)
|
| 512 |
except Exception as e:
|
| 513 |
+
print(f"โ OpenAI error in SQL generation: {e}", flush=True)
|
| 514 |
+
import traceback
|
| 515 |
+
traceback.print_exc()
|
| 516 |
|
| 517 |
# Fallback: return empty list
|
| 518 |
+
print("โ No API available or all APIs failed. Returning empty query list.", flush=True)
|
| 519 |
return []
|
| 520 |
|
| 521 |
def _parse_sql_queries(self, text: str) -> List[str]:
|
|
|
|
| 936 |
# Fallback: generate a detailed answer from query results even if LLM failed
|
| 937 |
# This ensures we always return a meaningful answer, not just a status message
|
| 938 |
successful_results = [r for r in query_results if not r.error and len(r.result) > 0]
|
| 939 |
+
failed_results = [r for r in query_results if r.error]
|
| 940 |
+
|
| 941 |
+
if len(sql_queries) == 0:
|
| 942 |
+
return "ืื ื ืืฆืจื ืฉืืืืชืืช SQL. ืืืชืื ืฉืืฉืืื ืื ืืจืืจื ืื ืฉืืฉ ืืขืื ืขื ื-API. ื ืกื ืืฉืืื ืฉืืื ืืืจืช ืื ืืืืง ืืช ื-API keys ื-Repository secrets."
|
| 943 |
+
|
| 944 |
if successful_results:
|
| 945 |
fallback_text = f"ืกืืืื ืืคืืจื ืฉื ืืืืฆืืื:\n\n"
|
| 946 |
+
fallback_text += f"ืืืฆืขื {len(sql_queries)} ืฉืืืืชืืช, ืืชืืื {len(successful_results)} ืืฆืืืื ืืืืืืจื ืชืืฆืืืช.\n"
|
| 947 |
+
if failed_results:
|
| 948 |
+
fallback_text += f"โ ๏ธ {len(failed_results)} ืฉืืืืชืืช ื ืืฉืื.\n"
|
| 949 |
+
fallback_text += "\n"
|
| 950 |
|
| 951 |
# Analyze and summarize each result
|
| 952 |
for i, qr in enumerate(successful_results, 1):
|
|
|
|
| 970 |
fallback_text += qr.result.head(5).to_string(index=False)
|
| 971 |
fallback_text += "\n\n"
|
| 972 |
|
| 973 |
+
if failed_results:
|
| 974 |
+
fallback_text += "\nืฉืืืืืช ืืฉืืืืชืืช:\n"
|
| 975 |
+
for i, qr in enumerate(failed_results, 1):
|
| 976 |
+
fallback_text += f"ืฉืืืืชื {i}: {qr.error}\n"
|
| 977 |
+
fallback_text += "\n"
|
| 978 |
+
|
| 979 |
fallback_text += "ืืขืจื: ืชืฉืืื ืื ื ืืฆืจื ืืืืืืืืช ืืืชืืฆืืืช. ืื ืืชืื ืืคืืจื ืืืชืจ, ื ืกื ืืฉืืื ืฉืืื ืกืคืฆืืคืืช ืืืชืจ."
|
| 980 |
return fallback_text
|
| 981 |
else:
|
| 982 |
+
# If no successful results, provide detailed error information
|
| 983 |
+
error_details = ""
|
| 984 |
+
if failed_results:
|
| 985 |
+
error_details = "\n\nืฉืืืืืช ืืฉืืืืชืืช:\n"
|
| 986 |
+
for i, qr in enumerate(failed_results, 1):
|
| 987 |
+
error_details += f"ืฉืืืืชื {i}: {qr.query}\n"
|
| 988 |
+
error_details += f"ืฉืืืื: {qr.error}\n\n"
|
| 989 |
+
|
| 990 |
+
return f"ืืืฆืขื {len(sql_queries)} ืฉืืืืชืืช, ืื ืื ืืชืงืืื ืชืืฆืืืช ืืื ืชืื ืื.{error_details}\nืืืชืื ืฉืื ืชืื ืื ืื ืืืืืื ืืืืข ืืชืืื ืืฉืืื ืฉื ืฉืืื. ื ืกื ืืฉืืื ืฉืืื ืืืจืช ืื ืืืืืง ืืช ืื ืชืื ืื ืืืืื ืื."
|
| 991 |
|
| 992 |
def _generate_visualizations(self, query_results: List[SQLQueryResult]) -> Optional[List[Dict[str, Any]]]:
|
| 993 |
"""
|