Spaces:

Trinoid
/

Data_Management_Mistral

Sleeping

App Files Files Community

Frankie-walsh4 commited on Apr 3, 2025

Commit

8c02af0

1 Parent(s): ec89f95

fixes

Browse files

Files changed (1) hide show

app.py +97 -26

app.py CHANGED Viewed

@@ -2,12 +2,29 @@ import gradio as gr
 from huggingface_hub import InferenceClient
 import os
 import time
 from huggingface_hub.errors import HfHubHTTPError
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
-client = InferenceClient("Trinoid/Data_Management_Mistral")
 def respond(
     message,
@@ -29,44 +46,98 @@ def respond(
     response = ""
     # Try to initialize the model with retries
     max_retries = 3
     retry_count = 0
     while retry_count < max_retries:
         try:
-            print(f"Attempt {retry_count + 1}/{max_retries} to call the model...")
-            for message in client.chat_completion(
-                messages,
-                max_tokens=max_tokens,
-                stream=True,
-                temperature=temperature,
-                top_p=top_p,
-            ):
-                token = message.choices[0].delta.content
-                if token:
-                    response += token
-                    yield response
-            # If we got here, we were successful
-            break
         except HfHubHTTPError as e:
             retry_count += 1
             error_message = str(e)
             print(f"Error: {error_message}")
-            if "504 Server Error: Gateway Timeout" in error_message and retry_count < max_retries:
-                wait_time = 10  # seconds
-                print(f"Model timed out. Waiting {wait_time} seconds before retry {retry_count}/{max_retries}...")
-                yield f"⌛ Model is warming up, please wait... (Attempt {retry_count}/{max_retries})"
-                time.sleep(wait_time)
-            else:
-                print("All retries failed or different error occurred.")
-                if "504 Server Error" in error_message:
-                    yield "❌ The model timed out after multiple attempts. Your model might still be loading or the server is busy. Try again in a few minutes."
                 else:
-                    yield f"❌ An error occurred: {error_message}"
-                break
 """

 from huggingface_hub import InferenceClient
 import os
 import time
+import json
+import requests
 from huggingface_hub.errors import HfHubHTTPError
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
+# Get token from environment (even though we might not need it)
+HF_TOKEN = os.environ.get("HF_TOKEN")
+print(f"HF_TOKEN is {'available' if HF_TOKEN else 'not available'}")
+# Try direct client with and without token
+if HF_TOKEN:
+    client = InferenceClient("Trinoid/Data_Management_Mistral", token=HF_TOKEN)
+    print("Created client with token")
+else:
+    client = InferenceClient("Trinoid/Data_Management_Mistral")
+    print("Created client without token")
+# Alternative API endpoint setup
+API_URL = "https://api-inference.huggingface.co/models/Trinoid/Data_Management_Mistral"
+headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
 def respond(
     message,
     response = ""
+    # Debug the messages being sent
+    print(f"Sending messages: {json.dumps(messages, indent=2)}")
     # Try to initialize the model with retries
     max_retries = 3
     retry_count = 0
+    # Try both methods: InferenceClient and direct API call
+    use_direct_api = False
     while retry_count < max_retries:
         try:
+            print(f"Attempt {retry_count + 1}/{max_retries} using {'direct API' if use_direct_api else 'InferenceClient'}...")
+            if not use_direct_api:
+                # Method 1: Using InferenceClient
+                for message in client.chat_completion(
+                    messages,
+                    max_tokens=max_tokens,
+                    stream=True,
+                    temperature=temperature,
+                    top_p=top_p,
+                ):
+                    token = message.choices[0].delta.content
+                    if token:
+                        response += token
+                        yield response
+                # If we got here, we were successful
+                break
+            else:
+                # Method 2: Direct API call
+                payload = {
+                    "inputs": messages,
+                    "parameters": {
+                        "max_new_tokens": max_tokens,
+                        "temperature": temperature,
+                        "top_p": top_p,
+                    },
+                    "stream": False,
+                }
+                print(f"Making direct API call to {API_URL}")
+                api_response = requests.post(API_URL, headers=headers, json=payload)
+                print(f"API response status: {api_response.status_code}")
+                if api_response.status_code == 200:
+                    result = api_response.json()
+                    print(f"API response: {json.dumps(result, indent=2)}")
+                    if isinstance(result, list) and len(result) > 0 and "generated_text" in result[0]:
+                        response = result[0]["generated_text"]
+                        yield response
+                        break
+                    else:
+                        print(f"Unexpected API response format: {result}")
+                        retry_count += 1
+                else:
+                    print(f"API error: {api_response.text}")
+                    if api_response.status_code == 504 and retry_count < max_retries - 1:
+                        retry_count += 1
+                        yield f"⌛ Model is warming up, please wait... (Attempt {retry_count}/{max_retries})"
+                        time.sleep(10)
+                    else:
+                        yield f"❌ API error: {api_response.status_code} - {api_response.text}"
+                        break
         except HfHubHTTPError as e:
             retry_count += 1
             error_message = str(e)
             print(f"Error: {error_message}")
+            if "504 Server Error: Gateway Timeout" in error_message:
+                if retry_count < max_retries - 1:
+                    wait_time = 10  # seconds
+                    print(f"Model timed out. Waiting {wait_time} seconds before retry {retry_count}/{max_retries}...")
+                    yield f"⌛ Model is warming up, please wait... (Attempt {retry_count}/{max_retries})"
+                    time.sleep(wait_time)
+                    # Try direct API on next attempt
+                    use_direct_api = True
                 else:
+                    print("All retries failed.")
+                    yield "❌ The model timed out after multiple attempts. Try again in a few minutes."
+                    break
+            else:
+                print(f"Non-timeout error: {error_message}")
+                yield f"❌ An error occurred: {error_message}"
+                # Try direct API on next attempt
+                use_direct_api = True
+        except Exception as e:
+            print(f"Unexpected error: {str(e)}")
+            yield f"❌ Unexpected error: {str(e)}"
+            break
 """