Spaces:

Intellectualtech
/

AI-Teacher1

Sleeping

Intellectualtech commited on May 13, 2025

Commit

b00d97e

verified ·

1 Parent(s): ca93545

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,9 +1,12 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
 from typing import List, Tuple
 import logging
 from collections import deque
 import re
 # Configure logging
 logging.basicConfig(
@@ -12,9 +15,12 @@ logging.basicConfig(
 )
 logger = logging.getLogger(__name__)
-# Initialize the InferenceClient
 try:
-    client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
     logger.info("Successfully initialized InferenceClient")
 except Exception as e:
     logger.error(f"Failed to initialize InferenceClient: {str(e)}")
@@ -97,21 +103,23 @@ def respond(
     response = ""
     try:
-        for message in client.chat_completion(
             messages,
             max_tokens=max_tokens,
             stream=True,
             temperature=temperature,
             top_p=top_p,
-        ):
             token = message.choices[0].delta.content or ""
             response += token
             yield response
         # Store the query and final response in memory
         add_to_memory(message, response)
     except Exception as e:
-        logger.error(f"Error during chat completion: {str(e)}")
-        raise RuntimeError("Failed to generate response from the model")
 def main():
     """

 import gradio as gr
 from huggingface_hub import InferenceClient
 from typing import List, Tuple
 import logging
 from collections import deque
 import re
+import os
 # Configure logging
 logging.basicConfig(
 )
 logger = logging.getLogger(__name__)
+# Initialize the InferenceClient with API token
 try:
+    client = InferenceClient(
+        model="HuggingFaceH4/zephyr-7b-beta",
+        token=os.getenv("HUGGINGFACEHUB_API_TOKEN")
+    )
     logger.info("Successfully initialized InferenceClient")
 except Exception as e:
     logger.error(f"Failed to initialize InferenceClient: {str(e)}")
     response = ""
     try:
+        stream = client.chat_completion(
             messages,
             max_tokens=max_tokens,
             stream=True,
             temperature=temperature,
             top_p=top_p,
+        )
+        for message in stream:
             token = message.choices[0].delta.content or ""
             response += token
             yield response
         # Store the query and final response in memory
         add_to_memory(message, response)
     except Exception as e:
+        error_msg = f"Error during chat completion: {str(e)}"
+        logger.error(error_msg)
+        yield error_msg  # Yield the error message to display in Gradio
 def main():
     """