Spaces:
Sleeping
Sleeping
Chris
commited on
Commit
Β·
6dce4fa
1
Parent(s):
f477d08
Final 5.10.3
Browse files- src/__pycache__/app.cpython-310.pyc +0 -0
- src/app.py +33 -8
- src/models/__pycache__/qwen_client.cpython-310.pyc +0 -0
- src/models/qwen_client.py +44 -16
- src/production_deployment_guide.md +38 -27
src/__pycache__/app.cpython-310.pyc
CHANGED
|
Binary files a/src/__pycache__/app.cpython-310.pyc and b/src/__pycache__/app.cpython-310.pyc differ
|
|
|
src/app.py
CHANGED
|
@@ -1148,9 +1148,10 @@ Please log in to access GAIA evaluation features with full inference access.
|
|
| 1148 |
refresh_auth_button = gr.Button("π Refresh Auth Status", variant="secondary", scale=1)
|
| 1149 |
|
| 1150 |
unit4_run_button = gr.Button(
|
| 1151 |
-
"
|
| 1152 |
variant="primary",
|
| 1153 |
-
scale=2
|
|
|
|
| 1154 |
)
|
| 1155 |
|
| 1156 |
unit4_status_output = gr.Textbox(
|
|
@@ -1269,6 +1270,35 @@ Please log in to access GAIA evaluation features with full inference access.
|
|
| 1269 |
|
| 1270 |
return status, table, auth_status, csv_update, json_update, summary_update
|
| 1271 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1272 |
unit4_run_button.click(
|
| 1273 |
fn=handle_evaluation_results,
|
| 1274 |
inputs=[], # No inputs needed - profile comes from session
|
|
@@ -1276,12 +1306,7 @@ Please log in to access GAIA evaluation features with full inference access.
|
|
| 1276 |
csv_download, json_download, summary_download]
|
| 1277 |
)
|
| 1278 |
|
| 1279 |
-
# Refresh authentication status
|
| 1280 |
-
def refresh_auth_status(request: gr.Request):
|
| 1281 |
-
"""Refresh authentication status display"""
|
| 1282 |
-
profile = getattr(request, 'oauth_profile', None)
|
| 1283 |
-
return format_auth_status(profile)
|
| 1284 |
-
|
| 1285 |
refresh_auth_button.click(
|
| 1286 |
fn=refresh_auth_status,
|
| 1287 |
outputs=[auth_status_display]
|
|
|
|
| 1148 |
refresh_auth_button = gr.Button("π Refresh Auth Status", variant="secondary", scale=1)
|
| 1149 |
|
| 1150 |
unit4_run_button = gr.Button(
|
| 1151 |
+
"π Login Required for GAIA Evaluation",
|
| 1152 |
variant="primary",
|
| 1153 |
+
scale=2,
|
| 1154 |
+
interactive=False # Disabled until login
|
| 1155 |
)
|
| 1156 |
|
| 1157 |
unit4_status_output = gr.Textbox(
|
|
|
|
| 1270 |
|
| 1271 |
return status, table, auth_status, csv_update, json_update, summary_update
|
| 1272 |
|
| 1273 |
+
def refresh_auth_status(request: gr.Request):
|
| 1274 |
+
"""Refresh authentication status display"""
|
| 1275 |
+
profile = getattr(request, 'oauth_profile', None)
|
| 1276 |
+
return format_auth_status(profile)
|
| 1277 |
+
|
| 1278 |
+
def check_login_state(request: gr.Request):
|
| 1279 |
+
"""Check if user is logged in and update UI accordingly"""
|
| 1280 |
+
profile = getattr(request, 'oauth_profile', None)
|
| 1281 |
+
|
| 1282 |
+
if profile:
|
| 1283 |
+
# User is logged in - return updated auth status
|
| 1284 |
+
auth_status = format_auth_status(profile)
|
| 1285 |
+
# Enable the run button
|
| 1286 |
+
button_update = gr.update(interactive=True, value="π Run GAIA Evaluation & Submit All Answers")
|
| 1287 |
+
return auth_status, button_update
|
| 1288 |
+
else:
|
| 1289 |
+
# User not logged in - show login required message
|
| 1290 |
+
auth_status = format_auth_status(None)
|
| 1291 |
+
# Disable the run button
|
| 1292 |
+
button_update = gr.update(interactive=False, value="π Login Required for GAIA Evaluation")
|
| 1293 |
+
return auth_status, button_update
|
| 1294 |
+
|
| 1295 |
+
# Set up automatic login state checking
|
| 1296 |
+
interface.load(
|
| 1297 |
+
fn=check_login_state,
|
| 1298 |
+
outputs=[auth_status_display, unit4_run_button],
|
| 1299 |
+
every=2 # Check every 2 seconds for login state changes
|
| 1300 |
+
)
|
| 1301 |
+
|
| 1302 |
unit4_run_button.click(
|
| 1303 |
fn=handle_evaluation_results,
|
| 1304 |
inputs=[], # No inputs needed - profile comes from session
|
|
|
|
| 1306 |
csv_download, json_download, summary_download]
|
| 1307 |
)
|
| 1308 |
|
| 1309 |
+
# Refresh authentication status manually
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1310 |
refresh_auth_button.click(
|
| 1311 |
fn=refresh_auth_status,
|
| 1312 |
outputs=[auth_status_display]
|
src/models/__pycache__/qwen_client.cpython-310.pyc
CHANGED
|
Binary files a/src/models/__pycache__/qwen_client.cpython-310.pyc and b/src/models/__pycache__/qwen_client.cpython-310.pyc differ
|
|
|
src/models/qwen_client.py
CHANGED
|
@@ -55,6 +55,11 @@ class QwenClient:
|
|
| 55 |
self.hf_token = hf_token or os.getenv("HUGGINGFACE_TOKEN") or os.getenv("HF_TOKEN")
|
| 56 |
if not self.hf_token:
|
| 57 |
logger.warning("No HuggingFace token provided. API access may be limited.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
# Define model configurations with fallbacks
|
| 60 |
self.models = {
|
|
@@ -123,11 +128,6 @@ class QwenClient:
|
|
| 123 |
self.langchain_clients = {}
|
| 124 |
self._initialize_clients()
|
| 125 |
|
| 126 |
-
# Cost tracking
|
| 127 |
-
self.total_cost = 0.0
|
| 128 |
-
self.request_count = 0
|
| 129 |
-
self.budget_limit = 0.10 # $0.10 total budget
|
| 130 |
-
|
| 131 |
def _initialize_clients(self):
|
| 132 |
"""Initialize HuggingFace clients with fallback support"""
|
| 133 |
|
|
@@ -351,6 +351,8 @@ class QwenClient:
|
|
| 351 |
formatted_prompt = f"Question: {prompt}\nAnswer:"
|
| 352 |
|
| 353 |
try:
|
|
|
|
|
|
|
| 354 |
response_text = client.text_generation(
|
| 355 |
formatted_prompt,
|
| 356 |
max_new_tokens=tokens,
|
|
@@ -361,33 +363,59 @@ class QwenClient:
|
|
| 361 |
|
| 362 |
if not response_text or not response_text.strip():
|
| 363 |
# Try alternative generation method if first fails
|
| 364 |
-
logger.warning(f"Empty response from {config.name} attempt 1, trying
|
| 365 |
response_text = client.text_generation(
|
| 366 |
prompt,
|
| 367 |
-
max_new_tokens=min(tokens,
|
| 368 |
-
temperature=0.7, # Higher temperature
|
| 369 |
-
return_full_text=False
|
|
|
|
| 370 |
)
|
| 371 |
|
| 372 |
if not response_text or not response_text.strip():
|
| 373 |
logger.warning(f"Empty response from {config.name} attempt 2, trying simple format...")
|
| 374 |
# Try even simpler format
|
| 375 |
response_text = client.text_generation(
|
| 376 |
-
f"Answer: {prompt}",
|
| 377 |
-
max_new_tokens=
|
| 378 |
-
temperature=0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 379 |
return_full_text=False
|
| 380 |
)
|
| 381 |
|
| 382 |
if not response_text or not response_text.strip():
|
| 383 |
-
|
|
|
|
|
|
|
| 384 |
|
| 385 |
except Exception as gen_error:
|
| 386 |
-
|
| 387 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 388 |
|
|
|
|
| 389 |
if not response_text or not response_text.strip():
|
| 390 |
-
|
|
|
|
|
|
|
| 391 |
|
| 392 |
response_time = time.time() - start_time
|
| 393 |
|
|
|
|
| 55 |
self.hf_token = hf_token or os.getenv("HUGGINGFACE_TOKEN") or os.getenv("HF_TOKEN")
|
| 56 |
if not self.hf_token:
|
| 57 |
logger.warning("No HuggingFace token provided. API access may be limited.")
|
| 58 |
+
|
| 59 |
+
# Initialize cost tracking first
|
| 60 |
+
self.total_cost = 0.0
|
| 61 |
+
self.request_count = 0
|
| 62 |
+
self.budget_limit = 0.10 # $0.10 total budget
|
| 63 |
|
| 64 |
# Define model configurations with fallbacks
|
| 65 |
self.models = {
|
|
|
|
| 128 |
self.langchain_clients = {}
|
| 129 |
self._initialize_clients()
|
| 130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
def _initialize_clients(self):
|
| 132 |
"""Initialize HuggingFace clients with fallback support"""
|
| 133 |
|
|
|
|
| 351 |
formatted_prompt = f"Question: {prompt}\nAnswer:"
|
| 352 |
|
| 353 |
try:
|
| 354 |
+
# First attempt: Standard formatted prompt
|
| 355 |
+
logger.info(f"Attempting generation with {config.name}...")
|
| 356 |
response_text = client.text_generation(
|
| 357 |
formatted_prompt,
|
| 358 |
max_new_tokens=tokens,
|
|
|
|
| 363 |
|
| 364 |
if not response_text or not response_text.strip():
|
| 365 |
# Try alternative generation method if first fails
|
| 366 |
+
logger.warning(f"Empty response from {config.name} attempt 1, trying direct prompt...")
|
| 367 |
response_text = client.text_generation(
|
| 368 |
prompt,
|
| 369 |
+
max_new_tokens=min(tokens, 50), # Smaller token limit
|
| 370 |
+
temperature=0.7, # Higher temperature
|
| 371 |
+
return_full_text=False,
|
| 372 |
+
do_sample=True
|
| 373 |
)
|
| 374 |
|
| 375 |
if not response_text or not response_text.strip():
|
| 376 |
logger.warning(f"Empty response from {config.name} attempt 2, trying simple format...")
|
| 377 |
# Try even simpler format
|
| 378 |
response_text = client.text_generation(
|
| 379 |
+
f"Answer this: {prompt}",
|
| 380 |
+
max_new_tokens=30,
|
| 381 |
+
temperature=0.8,
|
| 382 |
+
return_full_text=False,
|
| 383 |
+
do_sample=True
|
| 384 |
+
)
|
| 385 |
+
|
| 386 |
+
if not response_text or not response_text.strip():
|
| 387 |
+
# Final attempt with minimal parameters
|
| 388 |
+
logger.warning(f"Empty response from {config.name} attempt 3, trying minimal config...")
|
| 389 |
+
response_text = client.text_generation(
|
| 390 |
+
prompt[:100], # Truncate prompt
|
| 391 |
+
max_new_tokens=20,
|
| 392 |
return_full_text=False
|
| 393 |
)
|
| 394 |
|
| 395 |
if not response_text or not response_text.strip():
|
| 396 |
+
error_msg = f"No response received from {config.name} after 4 attempts. Last response: '{response_text}'"
|
| 397 |
+
logger.error(f"β {error_msg}")
|
| 398 |
+
raise ValueError(error_msg)
|
| 399 |
|
| 400 |
except Exception as gen_error:
|
| 401 |
+
error_details = str(gen_error)
|
| 402 |
+
logger.error(f"β Text generation failed for {config.name}: {error_details}")
|
| 403 |
+
|
| 404 |
+
# Check for specific error types
|
| 405 |
+
if "timeout" in error_details.lower():
|
| 406 |
+
raise ValueError(f"Timeout error with {config.name}: {error_details}")
|
| 407 |
+
elif "rate limit" in error_details.lower() or "429" in error_details:
|
| 408 |
+
raise ValueError(f"Rate limit error with {config.name}: {error_details}")
|
| 409 |
+
elif "auth" in error_details.lower() or "401" in error_details:
|
| 410 |
+
raise ValueError(f"Authentication error with {config.name}: {error_details}")
|
| 411 |
+
else:
|
| 412 |
+
raise ValueError(f"Generation error with {config.name}: {error_details}")
|
| 413 |
|
| 414 |
+
# Final validation
|
| 415 |
if not response_text or not response_text.strip():
|
| 416 |
+
error_msg = f"Final validation failed for {config.name}. Response: '{response_text}'"
|
| 417 |
+
logger.error(f"β {error_msg}")
|
| 418 |
+
raise ValueError(error_msg)
|
| 419 |
|
| 420 |
response_time = time.time() - start_time
|
| 421 |
|
src/production_deployment_guide.md
CHANGED
|
@@ -90,7 +90,44 @@ After successful deployment with enhanced fallback system:
|
|
| 90 |
- **Smart Error Recovery**: Advanced retry logic with multiple generation attempts
|
| 91 |
- **Enhanced Debugging**: Detailed error reporting for troubleshooting
|
| 92 |
|
| 93 |
-
### **π Latest Production Fixes (v2.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
#### OAuth Scope Request Configuration β
|
| 96 |
```python
|
|
@@ -108,32 +145,6 @@ login_button = gr.LoginButton(
|
|
| 108 |
)
|
| 109 |
```
|
| 110 |
|
| 111 |
-
#### HF_TOKEN Priority System β
|
| 112 |
-
```python
|
| 113 |
-
# Authentication priority order:
|
| 114 |
-
1. HF_TOKEN environment variable (highest priority)
|
| 115 |
-
2. OAuth token from user login (now requests inference access)
|
| 116 |
-
3. SimpleClient fallback (guaranteed to work)
|
| 117 |
-
```
|
| 118 |
-
|
| 119 |
-
#### Enhanced Error Handling β
|
| 120 |
-
```python
|
| 121 |
-
# Multi-attempt generation with detailed logging:
|
| 122 |
-
1. Standard generation with formatted prompt
|
| 123 |
-
2. Alternative generation with different parameters
|
| 124 |
-
3. Simple format generation as last resort
|
| 125 |
-
4. Graceful fallback to SimpleClient if all fail
|
| 126 |
-
```
|
| 127 |
-
|
| 128 |
-
#### Guaranteed Fallback Logic β
|
| 129 |
-
```python
|
| 130 |
-
# Even with HF_TOKEN, if models fail:
|
| 131 |
-
if main_client_fails:
|
| 132 |
-
log_detailed_error()
|
| 133 |
-
fallback_to_simple_client() # Always works
|
| 134 |
-
ensure_user_gets_responses()
|
| 135 |
-
```
|
| 136 |
-
|
| 137 |
## π― Deployment Steps
|
| 138 |
|
| 139 |
### 1. Pre-Deployment Checklist
|
|
|
|
| 90 |
- **Smart Error Recovery**: Advanced retry logic with multiple generation attempts
|
| 91 |
- **Enhanced Debugging**: Detailed error reporting for troubleshooting
|
| 92 |
|
| 93 |
+
### **π Latest Production Fixes (v2.2):**
|
| 94 |
+
|
| 95 |
+
#### Dynamic Authentication Detection β
|
| 96 |
+
```python
|
| 97 |
+
# Real-time login state monitoring:
|
| 98 |
+
interface.load(
|
| 99 |
+
fn=check_login_state,
|
| 100 |
+
outputs=[auth_status_display, unit4_run_button],
|
| 101 |
+
every=2 # Check every 2 seconds for login state changes
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
# Button state updates based on login:
|
| 105 |
+
if profile:
|
| 106 |
+
button_update = gr.update(interactive=True, value="π Run GAIA Evaluation & Submit All Answers")
|
| 107 |
+
else:
|
| 108 |
+
button_update = gr.update(interactive=False, value="π Login Required for GAIA Evaluation")
|
| 109 |
+
```
|
| 110 |
+
|
| 111 |
+
#### Model Initialization Bug Fixes β
|
| 112 |
+
```python
|
| 113 |
+
# Fixed QwenClient total_cost attribute error:
|
| 114 |
+
def __init__(self, hf_token: Optional[str] = None):
|
| 115 |
+
# Initialize cost tracking FIRST
|
| 116 |
+
self.total_cost = 0.0
|
| 117 |
+
self.request_count = 0
|
| 118 |
+
self.budget_limit = 0.10
|
| 119 |
+
# Then initialize models...
|
| 120 |
+
```
|
| 121 |
+
|
| 122 |
+
#### Enhanced FLAN-T5 Generation β
|
| 123 |
+
```python
|
| 124 |
+
# 4-attempt generation strategy:
|
| 125 |
+
1. Standard formatted prompt: "Question: {prompt}\nAnswer:"
|
| 126 |
+
2. Direct prompt with higher temperature
|
| 127 |
+
3. Simple format: "Answer this: {prompt}"
|
| 128 |
+
4. Minimal config with truncated prompt
|
| 129 |
+
# Each with detailed error logging and specific error type detection
|
| 130 |
+
```
|
| 131 |
|
| 132 |
#### OAuth Scope Request Configuration β
|
| 133 |
```python
|
|
|
|
| 145 |
)
|
| 146 |
```
|
| 147 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
## π― Deployment Steps
|
| 149 |
|
| 150 |
### 1. Pre-Deployment Checklist
|