Spaces:

PeterPinetree
/

Next-Token-Predictor

Running

App Files Files Community

PeterPinetree commited on Sep 17, 2025

Commit

8f0d448

1 Parent(s): f366b93

Switch to openai-community/gpt2 with gpt2-medium fallback for better serverless inference ability

Browse files

Files changed (1) hide show

app.py +29 -9

app.py CHANGED Viewed

@@ -11,7 +11,7 @@ load_dotenv()
 # Configuration
 API_BASE = "https://api-inference.huggingface.co/models/"
-MODEL_ID = "gpt2"
 HF_TOKEN = os.getenv('HF_NEXT_TOKEN_PREDICTOR_TOKEN', '')
 def show_token(token: str) -> str:
@@ -53,15 +53,35 @@ def predict_next_token(text: str, top_k: int = 10, hide_punctuation: bool = Fals
         response = requests.post(url, headers=headers, json=payload, timeout=30)
         if not response.ok:
-            error_msg = f"API Error: {response.status_code} for model {MODEL_ID}"
-            try:
-                error_detail = response.json()
-                if 'error' in error_detail:
-                    error_msg += f" - {error_detail['error']}"
-            except:
-                error_msg += f" - {response.text[:200]}"
-            return error_msg, ""
         result = response.json()
         prediction_time = int((time.time() - start_time) * 1000)

 # Configuration
 API_BASE = "https://api-inference.huggingface.co/models/"
+MODEL_ID = "openai-community/gpt2"
 HF_TOKEN = os.getenv('HF_NEXT_TOKEN_PREDICTOR_TOKEN', '')
 def show_token(token: str) -> str:
         response = requests.post(url, headers=headers, json=payload, timeout=30)
+        # Debug logging
+        print(f"API URL: {url}")
+        print(f"Response status: {response.status_code}")
         if not response.ok:
+            print(f"Response text: {response.text}")
+        if not response.ok:
+            # Try GPT-2 Medium as fallback if the main model fails
+            if MODEL_ID == "openai-community/gpt2":
+                print(f"Main model failed, trying GPT-2 Medium fallback...")
+                fallback_url = f"{API_BASE}openai-community/gpt2-medium"
+                fallback_response = requests.post(fallback_url, headers=headers, json=payload, timeout=30)
+                print(f"Fallback response status: {fallback_response.status_code}")
+                if fallback_response.ok:
+                    response = fallback_response
+                    print("✅ Fallback successful!")
+                else:
+                    print(f"Fallback also failed: {fallback_response.text[:100]}")
+            # If still not ok after fallback attempt
+            if not response.ok:
+                error_msg = f"API Error: {response.status_code} for model {MODEL_ID}"
+                try:
+                    error_detail = response.json()
+                    if 'error' in error_detail:
+                        error_msg += f" - {error_detail['error']}"
+                except:
+                    error_msg += f" - {response.text[:200]}"
+                return error_msg, ""
         result = response.json()
         prediction_time = int((time.time() - start_time) * 1000)