PeterPinetree commited on
Commit
f366b93
·
1 Parent(s): 50a236d

Switch to GPT-2 for better serverless inference

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -11,7 +11,7 @@ load_dotenv()
11
 
12
  # Configuration
13
  API_BASE = "https://api-inference.huggingface.co/models/"
14
- MODEL_ID = "Qwen/Qwen3-0.6B"
15
  HF_TOKEN = os.getenv('HF_NEXT_TOKEN_PREDICTOR_TOKEN', '')
16
 
17
  def show_token(token: str) -> str:
@@ -54,13 +54,13 @@ def predict_next_token(text: str, top_k: int = 10, hide_punctuation: bool = Fals
54
  response = requests.post(url, headers=headers, json=payload, timeout=30)
55
 
56
  if not response.ok:
57
- error_msg = f"API Error: {response.status_code}"
58
  try:
59
  error_detail = response.json()
60
  if 'error' in error_detail:
61
  error_msg += f" - {error_detail['error']}"
62
  except:
63
- error_msg += f" - {response.text[:100]}"
64
  return error_msg, ""
65
 
66
  result = response.json()
 
11
 
12
  # Configuration
13
  API_BASE = "https://api-inference.huggingface.co/models/"
14
+ MODEL_ID = "gpt2"
15
  HF_TOKEN = os.getenv('HF_NEXT_TOKEN_PREDICTOR_TOKEN', '')
16
 
17
  def show_token(token: str) -> str:
 
54
  response = requests.post(url, headers=headers, json=payload, timeout=30)
55
 
56
  if not response.ok:
57
+ error_msg = f"API Error: {response.status_code} for model {MODEL_ID}"
58
  try:
59
  error_detail = response.json()
60
  if 'error' in error_detail:
61
  error_msg += f" - {error_detail['error']}"
62
  except:
63
+ error_msg += f" - {response.text[:200]}"
64
  return error_msg, ""
65
 
66
  result = response.json()