rifatSDAS commited on
Commit
e12a453
·
1 Parent(s): 07a82e7

Fix LLM Access Error on App.

Browse files
Files changed (1) hide show
  1. app.py +10 -2
app.py CHANGED
@@ -35,7 +35,11 @@ def create_client() -> Optional[InferenceClient]:
35
  if not HF_TOKEN:
36
  return None
37
  try:
38
- return InferenceClient(model=MODEL_ID, token=HF_TOKEN)
 
 
 
 
39
  except Exception as e:
40
  print(f"Error creating client: {e}")
41
  return None
@@ -94,6 +98,7 @@ def generate_response(
94
  try:
95
  response_text = ""
96
  stream = client.chat_completion(
 
97
  messages=messages,
98
  max_tokens=MAX_NEW_TOKENS,
99
  temperature=TEMPERATURE,
@@ -109,10 +114,13 @@ def generate_response(
109
 
110
  except Exception as e:
111
  error_msg = str(e)
 
112
  if "401" in error_msg or "unauthorized" in error_msg.lower():
113
- yield "⚠️ **Authentication Error**: Invalid HuggingFace token. Please check your API token."
114
  elif "429" in error_msg or "rate" in error_msg.lower():
115
  yield "⚠️ **Rate Limit**: Too many requests. Please wait a moment and try again."
 
 
116
  else:
117
  yield f"⚠️ **Error**: {error_msg}"
118
 
 
35
  if not HF_TOKEN:
36
  return None
37
  try:
38
+ # Use provider parameter for Inference Providers API
39
+ return InferenceClient(
40
+ provider="hf-inference",
41
+ api_key=HF_TOKEN,
42
+ )
43
  except Exception as e:
44
  print(f"Error creating client: {e}")
45
  return None
 
98
  try:
99
  response_text = ""
100
  stream = client.chat_completion(
101
+ model=MODEL_ID,
102
  messages=messages,
103
  max_tokens=MAX_NEW_TOKENS,
104
  temperature=TEMPERATURE,
 
114
 
115
  except Exception as e:
116
  error_msg = str(e)
117
+ print(f"[DEBUG] Full error: {error_msg}") # Log full error for debugging
118
  if "401" in error_msg or "unauthorized" in error_msg.lower():
119
+ yield f"⚠️ **Authentication Error**: The model API returned 401. This could mean:\n\n1. The model `{MODEL_ID}` may require accepting terms at the model page\n2. The model may have been gated or moved\n3. Token permissions issue\n\n**Debug info**: {error_msg[:200]}"
120
  elif "429" in error_msg or "rate" in error_msg.lower():
121
  yield "⚠️ **Rate Limit**: Too many requests. Please wait a moment and try again."
122
+ elif "503" in error_msg or "loading" in error_msg.lower():
123
+ yield "⚠️ **Model Loading**: The model is currently loading. Please try again in a few seconds."
124
  else:
125
  yield f"⚠️ **Error**: {error_msg}"
126