Chris commited on
Commit
6dce4fa
Β·
1 Parent(s): f477d08

Final 5.10.3

Browse files
src/__pycache__/app.cpython-310.pyc CHANGED
Binary files a/src/__pycache__/app.cpython-310.pyc and b/src/__pycache__/app.cpython-310.pyc differ
 
src/app.py CHANGED
@@ -1148,9 +1148,10 @@ Please log in to access GAIA evaluation features with full inference access.
1148
  refresh_auth_button = gr.Button("πŸ”„ Refresh Auth Status", variant="secondary", scale=1)
1149
 
1150
  unit4_run_button = gr.Button(
1151
- "πŸš€ Run GAIA Evaluation & Submit All Answers",
1152
  variant="primary",
1153
- scale=2
 
1154
  )
1155
 
1156
  unit4_status_output = gr.Textbox(
@@ -1269,6 +1270,35 @@ Please log in to access GAIA evaluation features with full inference access.
1269
 
1270
  return status, table, auth_status, csv_update, json_update, summary_update
1271
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1272
  unit4_run_button.click(
1273
  fn=handle_evaluation_results,
1274
  inputs=[], # No inputs needed - profile comes from session
@@ -1276,12 +1306,7 @@ Please log in to access GAIA evaluation features with full inference access.
1276
  csv_download, json_download, summary_download]
1277
  )
1278
 
1279
- # Refresh authentication status
1280
- def refresh_auth_status(request: gr.Request):
1281
- """Refresh authentication status display"""
1282
- profile = getattr(request, 'oauth_profile', None)
1283
- return format_auth_status(profile)
1284
-
1285
  refresh_auth_button.click(
1286
  fn=refresh_auth_status,
1287
  outputs=[auth_status_display]
 
1148
  refresh_auth_button = gr.Button("πŸ”„ Refresh Auth Status", variant="secondary", scale=1)
1149
 
1150
  unit4_run_button = gr.Button(
1151
+ "πŸ”’ Login Required for GAIA Evaluation",
1152
  variant="primary",
1153
+ scale=2,
1154
+ interactive=False # Disabled until login
1155
  )
1156
 
1157
  unit4_status_output = gr.Textbox(
 
1270
 
1271
  return status, table, auth_status, csv_update, json_update, summary_update
1272
 
1273
+ def refresh_auth_status(request: gr.Request):
1274
+ """Refresh authentication status display"""
1275
+ profile = getattr(request, 'oauth_profile', None)
1276
+ return format_auth_status(profile)
1277
+
1278
+ def check_login_state(request: gr.Request):
1279
+ """Check if user is logged in and update UI accordingly"""
1280
+ profile = getattr(request, 'oauth_profile', None)
1281
+
1282
+ if profile:
1283
+ # User is logged in - return updated auth status
1284
+ auth_status = format_auth_status(profile)
1285
+ # Enable the run button
1286
+ button_update = gr.update(interactive=True, value="πŸš€ Run GAIA Evaluation & Submit All Answers")
1287
+ return auth_status, button_update
1288
+ else:
1289
+ # User not logged in - show login required message
1290
+ auth_status = format_auth_status(None)
1291
+ # Disable the run button
1292
+ button_update = gr.update(interactive=False, value="πŸ”’ Login Required for GAIA Evaluation")
1293
+ return auth_status, button_update
1294
+
1295
+ # Set up automatic login state checking
1296
+ interface.load(
1297
+ fn=check_login_state,
1298
+ outputs=[auth_status_display, unit4_run_button],
1299
+ every=2 # Check every 2 seconds for login state changes
1300
+ )
1301
+
1302
  unit4_run_button.click(
1303
  fn=handle_evaluation_results,
1304
  inputs=[], # No inputs needed - profile comes from session
 
1306
  csv_download, json_download, summary_download]
1307
  )
1308
 
1309
+ # Refresh authentication status manually
 
 
 
 
 
1310
  refresh_auth_button.click(
1311
  fn=refresh_auth_status,
1312
  outputs=[auth_status_display]
src/models/__pycache__/qwen_client.cpython-310.pyc CHANGED
Binary files a/src/models/__pycache__/qwen_client.cpython-310.pyc and b/src/models/__pycache__/qwen_client.cpython-310.pyc differ
 
src/models/qwen_client.py CHANGED
@@ -55,6 +55,11 @@ class QwenClient:
55
  self.hf_token = hf_token or os.getenv("HUGGINGFACE_TOKEN") or os.getenv("HF_TOKEN")
56
  if not self.hf_token:
57
  logger.warning("No HuggingFace token provided. API access may be limited.")
 
 
 
 
 
58
 
59
  # Define model configurations with fallbacks
60
  self.models = {
@@ -123,11 +128,6 @@ class QwenClient:
123
  self.langchain_clients = {}
124
  self._initialize_clients()
125
 
126
- # Cost tracking
127
- self.total_cost = 0.0
128
- self.request_count = 0
129
- self.budget_limit = 0.10 # $0.10 total budget
130
-
131
  def _initialize_clients(self):
132
  """Initialize HuggingFace clients with fallback support"""
133
 
@@ -351,6 +351,8 @@ class QwenClient:
351
  formatted_prompt = f"Question: {prompt}\nAnswer:"
352
 
353
  try:
 
 
354
  response_text = client.text_generation(
355
  formatted_prompt,
356
  max_new_tokens=tokens,
@@ -361,33 +363,59 @@ class QwenClient:
361
 
362
  if not response_text or not response_text.strip():
363
  # Try alternative generation method if first fails
364
- logger.warning(f"Empty response from {config.name} attempt 1, trying alternative...")
365
  response_text = client.text_generation(
366
  prompt,
367
- max_new_tokens=min(tokens, 100), # Smaller token limit
368
- temperature=0.7, # Higher temperature for more response
369
- return_full_text=False
 
370
  )
371
 
372
  if not response_text or not response_text.strip():
373
  logger.warning(f"Empty response from {config.name} attempt 2, trying simple format...")
374
  # Try even simpler format
375
  response_text = client.text_generation(
376
- f"Answer: {prompt}",
377
- max_new_tokens=50,
378
- temperature=0.5,
 
 
 
 
 
 
 
 
 
 
379
  return_full_text=False
380
  )
381
 
382
  if not response_text or not response_text.strip():
383
- raise ValueError(f"No response received from {config.name} after 3 attempts. Response was: '{response_text}'")
 
 
384
 
385
  except Exception as gen_error:
386
- logger.error(f"❌ Text generation failed for {config.name}: {gen_error}")
387
- raise ValueError(f"Text generation error: {gen_error}")
 
 
 
 
 
 
 
 
 
 
388
 
 
389
  if not response_text or not response_text.strip():
390
- raise ValueError(f"Final response check failed for {config.name}. Response: '{response_text}'")
 
 
391
 
392
  response_time = time.time() - start_time
393
 
 
55
  self.hf_token = hf_token or os.getenv("HUGGINGFACE_TOKEN") or os.getenv("HF_TOKEN")
56
  if not self.hf_token:
57
  logger.warning("No HuggingFace token provided. API access may be limited.")
58
+
59
+ # Initialize cost tracking first
60
+ self.total_cost = 0.0
61
+ self.request_count = 0
62
+ self.budget_limit = 0.10 # $0.10 total budget
63
 
64
  # Define model configurations with fallbacks
65
  self.models = {
 
128
  self.langchain_clients = {}
129
  self._initialize_clients()
130
 
 
 
 
 
 
131
  def _initialize_clients(self):
132
  """Initialize HuggingFace clients with fallback support"""
133
 
 
351
  formatted_prompt = f"Question: {prompt}\nAnswer:"
352
 
353
  try:
354
+ # First attempt: Standard formatted prompt
355
+ logger.info(f"Attempting generation with {config.name}...")
356
  response_text = client.text_generation(
357
  formatted_prompt,
358
  max_new_tokens=tokens,
 
363
 
364
  if not response_text or not response_text.strip():
365
  # Try alternative generation method if first fails
366
+ logger.warning(f"Empty response from {config.name} attempt 1, trying direct prompt...")
367
  response_text = client.text_generation(
368
  prompt,
369
+ max_new_tokens=min(tokens, 50), # Smaller token limit
370
+ temperature=0.7, # Higher temperature
371
+ return_full_text=False,
372
+ do_sample=True
373
  )
374
 
375
  if not response_text or not response_text.strip():
376
  logger.warning(f"Empty response from {config.name} attempt 2, trying simple format...")
377
  # Try even simpler format
378
  response_text = client.text_generation(
379
+ f"Answer this: {prompt}",
380
+ max_new_tokens=30,
381
+ temperature=0.8,
382
+ return_full_text=False,
383
+ do_sample=True
384
+ )
385
+
386
+ if not response_text or not response_text.strip():
387
+ # Final attempt with minimal parameters
388
+ logger.warning(f"Empty response from {config.name} attempt 3, trying minimal config...")
389
+ response_text = client.text_generation(
390
+ prompt[:100], # Truncate prompt
391
+ max_new_tokens=20,
392
  return_full_text=False
393
  )
394
 
395
  if not response_text or not response_text.strip():
396
+ error_msg = f"No response received from {config.name} after 4 attempts. Last response: '{response_text}'"
397
+ logger.error(f"❌ {error_msg}")
398
+ raise ValueError(error_msg)
399
 
400
  except Exception as gen_error:
401
+ error_details = str(gen_error)
402
+ logger.error(f"❌ Text generation failed for {config.name}: {error_details}")
403
+
404
+ # Check for specific error types
405
+ if "timeout" in error_details.lower():
406
+ raise ValueError(f"Timeout error with {config.name}: {error_details}")
407
+ elif "rate limit" in error_details.lower() or "429" in error_details:
408
+ raise ValueError(f"Rate limit error with {config.name}: {error_details}")
409
+ elif "auth" in error_details.lower() or "401" in error_details:
410
+ raise ValueError(f"Authentication error with {config.name}: {error_details}")
411
+ else:
412
+ raise ValueError(f"Generation error with {config.name}: {error_details}")
413
 
414
+ # Final validation
415
  if not response_text or not response_text.strip():
416
+ error_msg = f"Final validation failed for {config.name}. Response: '{response_text}'"
417
+ logger.error(f"❌ {error_msg}")
418
+ raise ValueError(error_msg)
419
 
420
  response_time = time.time() - start_time
421
 
src/production_deployment_guide.md CHANGED
@@ -90,7 +90,44 @@ After successful deployment with enhanced fallback system:
90
  - **Smart Error Recovery**: Advanced retry logic with multiple generation attempts
91
  - **Enhanced Debugging**: Detailed error reporting for troubleshooting
92
 
93
- ### **πŸ“Š Latest Production Fixes (v2.1):**
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
  #### OAuth Scope Request Configuration βœ…
96
  ```python
@@ -108,32 +145,6 @@ login_button = gr.LoginButton(
108
  )
109
  ```
110
 
111
- #### HF_TOKEN Priority System βœ…
112
- ```python
113
- # Authentication priority order:
114
- 1. HF_TOKEN environment variable (highest priority)
115
- 2. OAuth token from user login (now requests inference access)
116
- 3. SimpleClient fallback (guaranteed to work)
117
- ```
118
-
119
- #### Enhanced Error Handling βœ…
120
- ```python
121
- # Multi-attempt generation with detailed logging:
122
- 1. Standard generation with formatted prompt
123
- 2. Alternative generation with different parameters
124
- 3. Simple format generation as last resort
125
- 4. Graceful fallback to SimpleClient if all fail
126
- ```
127
-
128
- #### Guaranteed Fallback Logic βœ…
129
- ```python
130
- # Even with HF_TOKEN, if models fail:
131
- if main_client_fails:
132
- log_detailed_error()
133
- fallback_to_simple_client() # Always works
134
- ensure_user_gets_responses()
135
- ```
136
-
137
  ## 🎯 Deployment Steps
138
 
139
  ### 1. Pre-Deployment Checklist
 
90
  - **Smart Error Recovery**: Advanced retry logic with multiple generation attempts
91
  - **Enhanced Debugging**: Detailed error reporting for troubleshooting
92
 
93
+ ### **πŸ“Š Latest Production Fixes (v2.2):**
94
+
95
+ #### Dynamic Authentication Detection βœ…
96
+ ```python
97
+ # Real-time login state monitoring:
98
+ interface.load(
99
+ fn=check_login_state,
100
+ outputs=[auth_status_display, unit4_run_button],
101
+ every=2 # Check every 2 seconds for login state changes
102
+ )
103
+
104
+ # Button state updates based on login:
105
+ if profile:
106
+ button_update = gr.update(interactive=True, value="πŸš€ Run GAIA Evaluation & Submit All Answers")
107
+ else:
108
+ button_update = gr.update(interactive=False, value="πŸ”’ Login Required for GAIA Evaluation")
109
+ ```
110
+
111
+ #### Model Initialization Bug Fixes βœ…
112
+ ```python
113
+ # Fixed QwenClient total_cost attribute error:
114
+ def __init__(self, hf_token: Optional[str] = None):
115
+ # Initialize cost tracking FIRST
116
+ self.total_cost = 0.0
117
+ self.request_count = 0
118
+ self.budget_limit = 0.10
119
+ # Then initialize models...
120
+ ```
121
+
122
+ #### Enhanced FLAN-T5 Generation βœ…
123
+ ```python
124
+ # 4-attempt generation strategy:
125
+ 1. Standard formatted prompt: "Question: {prompt}\nAnswer:"
126
+ 2. Direct prompt with higher temperature
127
+ 3. Simple format: "Answer this: {prompt}"
128
+ 4. Minimal config with truncated prompt
129
+ # Each with detailed error logging and specific error type detection
130
+ ```
131
 
132
  #### OAuth Scope Request Configuration βœ…
133
  ```python
 
145
  )
146
  ```
147
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  ## 🎯 Deployment Steps
149
 
150
  ### 1. Pre-Deployment Checklist