Spaces:

Trinoid
/

Data_Management_Mistral

Sleeping

App Files Files Community

Frankie-walsh4 commited on Apr 3, 2025

Commit

9074c4e

1 Parent(s): a7af00e

Update app to use environment variables for token

Browse files

Files changed (1) hide show

app.py +49 -21

app.py CHANGED Viewed

@@ -1,21 +1,21 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
 import os
-from dotenv import load_dotenv
-# Load environment variables from .env file
-load_dotenv()
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
-# You need to provide an API token to access your private model
-# Get your token from: https://huggingface.co/settings/tokens
-HF_TOKEN = os.environ.get("HF_TOKEN")  # You should set this as an environment variable
 if not HF_TOKEN:
     print("Warning: No Hugging Face token found in environment variables.")
-    print("Please set your HF_TOKEN environment variable or add it directly in the code.")
-    print("Get your token from: https://huggingface.co/settings/tokens")
 else:
     print("HF_TOKEN found in environment variables!")
@@ -44,18 +44,45 @@ def respond(
     messages.append({"role": "user", "content": message})
     response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
 """
@@ -75,6 +102,7 @@ demo = gr.ChatInterface(
             label="Top-p (nucleus sampling)",
         ),
     ],
 )

 import gradio as gr
 from huggingface_hub import InferenceClient
 import os
+import time
+from huggingface_hub.errors import HfHubHTTPError
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
+# Get the token from environment variables
+# For Hugging Face Spaces, add your token as a secret named HF_TOKEN
+# https://huggingface.co/docs/hub/spaces-overview#managing-secrets
+HF_TOKEN = os.environ.get("HF_TOKEN")
 if not HF_TOKEN:
     print("Warning: No Hugging Face token found in environment variables.")
+    print("For Hugging Face Spaces: Add your token as a secret named HF_TOKEN in the Settings tab.")
+    print("See: https://huggingface.co/docs/hub/spaces-overview#managing-secrets")
 else:
     print("HF_TOKEN found in environment variables!")
     messages.append({"role": "user", "content": message})
     response = ""
+    # Try to initialize the model with retries
+    max_retries = 3
+    retry_count = 0
+    while retry_count < max_retries:
+        try:
+            print(f"Attempt {retry_count + 1}/{max_retries} to call the model...")
+            for message in client.chat_completion(
+                messages,
+                max_tokens=max_tokens,
+                stream=True,
+                temperature=temperature,
+                top_p=top_p,
+            ):
+                token = message.choices[0].delta.content
+                if token:
+                    response += token
+                    yield response
+            # If we got here, we were successful
+            break
+        except HfHubHTTPError as e:
+            retry_count += 1
+            error_message = str(e)
+            print(f"Error: {error_message}")
+            if "504 Server Error: Gateway Timeout" in error_message and retry_count < max_retries:
+                wait_time = 10  # seconds
+                print(f"Model timed out. Waiting {wait_time} seconds before retry {retry_count}/{max_retries}...")
+                yield f"⌛ Model is warming up, please wait... (Attempt {retry_count}/{max_retries})"
+                time.sleep(wait_time)
+            else:
+                print("All retries failed or different error occurred.")
+                if "504 Server Error" in error_message:
+                    yield "❌ The model timed out after multiple attempts. Your model might still be loading or the server is busy. Try again in a few minutes."
+                else:
+                    yield f"❌ An error occurred: {error_message}"
+                break
 """
             label="Top-p (nucleus sampling)",
         ),
     ],
+    description="This interface uses your fine-tuned Mistral model for Microsoft 365 data management. The first request may take some time as the model loads."
 )