Spaces:
Sleeping
Sleeping
Fix LLM Access Error on App.
Browse files
app.py
CHANGED
|
@@ -35,7 +35,11 @@ def create_client() -> Optional[InferenceClient]:
|
|
| 35 |
if not HF_TOKEN:
|
| 36 |
return None
|
| 37 |
try:
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
except Exception as e:
|
| 40 |
print(f"Error creating client: {e}")
|
| 41 |
return None
|
|
@@ -94,6 +98,7 @@ def generate_response(
|
|
| 94 |
try:
|
| 95 |
response_text = ""
|
| 96 |
stream = client.chat_completion(
|
|
|
|
| 97 |
messages=messages,
|
| 98 |
max_tokens=MAX_NEW_TOKENS,
|
| 99 |
temperature=TEMPERATURE,
|
|
@@ -109,10 +114,13 @@ def generate_response(
|
|
| 109 |
|
| 110 |
except Exception as e:
|
| 111 |
error_msg = str(e)
|
|
|
|
| 112 |
if "401" in error_msg or "unauthorized" in error_msg.lower():
|
| 113 |
-
yield "⚠️ **Authentication Error**:
|
| 114 |
elif "429" in error_msg or "rate" in error_msg.lower():
|
| 115 |
yield "⚠️ **Rate Limit**: Too many requests. Please wait a moment and try again."
|
|
|
|
|
|
|
| 116 |
else:
|
| 117 |
yield f"⚠️ **Error**: {error_msg}"
|
| 118 |
|
|
|
|
| 35 |
if not HF_TOKEN:
|
| 36 |
return None
|
| 37 |
try:
|
| 38 |
+
# Use provider parameter for Inference Providers API
|
| 39 |
+
return InferenceClient(
|
| 40 |
+
provider="hf-inference",
|
| 41 |
+
api_key=HF_TOKEN,
|
| 42 |
+
)
|
| 43 |
except Exception as e:
|
| 44 |
print(f"Error creating client: {e}")
|
| 45 |
return None
|
|
|
|
| 98 |
try:
|
| 99 |
response_text = ""
|
| 100 |
stream = client.chat_completion(
|
| 101 |
+
model=MODEL_ID,
|
| 102 |
messages=messages,
|
| 103 |
max_tokens=MAX_NEW_TOKENS,
|
| 104 |
temperature=TEMPERATURE,
|
|
|
|
| 114 |
|
| 115 |
except Exception as e:
|
| 116 |
error_msg = str(e)
|
| 117 |
+
print(f"[DEBUG] Full error: {error_msg}") # Log full error for debugging
|
| 118 |
if "401" in error_msg or "unauthorized" in error_msg.lower():
|
| 119 |
+
yield f"⚠️ **Authentication Error**: The model API returned 401. This could mean:\n\n1. The model `{MODEL_ID}` may require accepting terms at the model page\n2. The model may have been gated or moved\n3. Token permissions issue\n\n**Debug info**: {error_msg[:200]}"
|
| 120 |
elif "429" in error_msg or "rate" in error_msg.lower():
|
| 121 |
yield "⚠️ **Rate Limit**: Too many requests. Please wait a moment and try again."
|
| 122 |
+
elif "503" in error_msg or "loading" in error_msg.lower():
|
| 123 |
+
yield "⚠️ **Model Loading**: The model is currently loading. Please try again in a few seconds."
|
| 124 |
else:
|
| 125 |
yield f"⚠️ **Error**: {error_msg}"
|
| 126 |
|