Spaces:

ndwdgda
/

cpu

Sleeping

App Files Files Community

Nhughes09 commited on Dec 11, 2025

Commit

6484aef

1 Parent(s): cdfd669

Use HF_TOKEN from environment secrets instead of hardcoded

Browse files

Files changed (1) hide show

app.py +36 -56

app.py CHANGED Viewed

@@ -3,11 +3,11 @@ import requests
 import logging
 import sys
 import time
-# --- Extreme Logging Setup ---
 logging.basicConfig(
     level=logging.INFO,
-    format='%(asctime)s - %(levelname)s - %(message)s',
     handlers=[logging.StreamHandler(sys.stdout)]
 )
 logger = logging.getLogger("ChatbotBrain")
@@ -15,98 +15,77 @@ logger = logging.getLogger("ChatbotBrain")
 logger.info(f"Gradio Version: {gr.__version__}")
 logger.info(f"Python Version: {sys.version}")
-# --- Configuration ---
 MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
-HF_TOKEN = "hf_" + "tHMFjUJIvQEMMSxyYZiNshryJqKagoUQBL"
-# New API endpoint (api-inference is deprecated)
 API_URL = f"https://router.huggingface.co/hf-inference/models/{MODEL_NAME}"
-HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}
 logger.info(f"Using Model: {MODEL_NAME}")
 logger.info(f"API URL: {API_URL}")
-def format_prompt(message, history):
-    """Formats the conversation history into a prompt for the model."""
-    prompt = ""
-    for user_msg, assistant_msg in history:
-        prompt += f"&lt;|user|&gt;\n{user_msg}&lt;/s&gt;\n"
-        prompt += f"&lt;|assistant|&gt;\n{assistant_msg}&lt;/s&gt;\n"
-    prompt += f"&lt;|user|&gt;\n{message}&lt;/s&gt;\n&lt;|assistant|&gt;\n"
-    return prompt
 def query_model(payload):
-    """Sends a request to the HF Inference API."""
-    logger.info(f"Sending request to API with payload: {payload}")
-    start_time = time.time()
     try:
         response = requests.post(API_URL, headers=HEADERS, json=payload, timeout=60)
-        duration = time.time() - start_time
-        logger.info(f"API Response Status: {response.status_code} (took {duration:.2f}s)")
-        logger.info(f"API Response Headers: {dict(response.headers)}")
         if response.status_code != 200:
             logger.error(f"API Error: {response.text}")
-            return {"error": f"API returned status {response.status_code}: {response.text}"}
         result = response.json()
-        logger.info(f"API Response Body: {result}")
         return result
-    except requests.exceptions.Timeout:
-        logger.error("API request timed out after 60 seconds")
-        return {"error": "Request timed out. The model may be loading, please try again."}
     except Exception as e:
-        logger.error(f"Exception during API call: {e}")
         return {"error": str(e)}
 def respond(message, history):
-    """Generates a response from the AI model."""
     logger.info("="*50)
-    logger.info(f"RECEIVED USER MESSAGE: {message}")
-    logger.info(f"Current History Length: {len(history)}")
-    formatted_prompt = format_prompt(message, history)
-    logger.info(f"Formatted Prompt:\n{formatted_prompt}")
-    logger.info("Thinking... (Sending request to HF Inference API)")
     payload = {
-        "inputs": formatted_prompt,
-        "parameters": {
-            "max_new_tokens": 512,
-            "temperature": 0.7,
-            "do_sample": True,
-            "return_full_text": False
-        }
     }
     result = query_model(payload)
     if "error" in result:
-        error_msg = result["error"]
-        logger.error(f"Error from API: {error_msg}")
-        return f"Error: {error_msg}"
-    # Handle different response formats
     if isinstance(result, list) and len(result) > 0:
-        generated_text = result[0].get("generated_text", "")
-    elif isinstance(result, dict):
-        generated_text = result.get("generated_text", str(result))
     else:
-        generated_text = str(result)
-    logger.info(f"GENERATED RESPONSE:\n{generated_text}")
     logger.info("="*50)
-    return generated_text
-# --- Gradio UI ---
 logger.info("Building Gradio Interface...")
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# CPU Chatbot")
     gr.Markdown(f"### Powered by {MODEL_NAME}")
-    gr.Markdown("Check the **Container Logs** to see the AI 'thinking'!")
     chatbot = gr.Chatbot(height=500)
     msg = gr.Textbox(placeholder="Ask me anything...", label="Your message")
@@ -133,3 +112,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
 if __name__ == "__main__":
     logger.info("Launching Gradio App...")
     demo.launch()

 import logging
 import sys
 import time
+import os
 logging.basicConfig(
     level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s",
     handlers=[logging.StreamHandler(sys.stdout)]
 )
 logger = logging.getLogger("ChatbotBrain")
 logger.info(f"Gradio Version: {gr.__version__}")
 logger.info(f"Python Version: {sys.version}")
 MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
+HF_TOKEN = os.environ.get("HF_TOKEN")
+if HF_TOKEN:
+    logger.info("SUCCESS: HF_TOKEN loaded from environment secrets!")
+    logger.info(f"Token starts with: {HF_TOKEN[:10]}...")
+else:
+    logger.error("ERROR: HF_TOKEN not found! Add it to Space Secrets.")
 API_URL = f"https://router.huggingface.co/hf-inference/models/{MODEL_NAME}"
+HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
 logger.info(f"Using Model: {MODEL_NAME}")
 logger.info(f"API URL: {API_URL}")
 def query_model(payload):
+    logger.info(f"Sending request with payload: {payload}")
     try:
         response = requests.post(API_URL, headers=HEADERS, json=payload, timeout=60)
+        logger.info(f"Response Status: {response.status_code}")
         if response.status_code != 200:
             logger.error(f"API Error: {response.text}")
+            return {"error": f"API returned {response.status_code}: {response.text}"}
         result = response.json()
+        logger.info(f"Response Body: {result}")
         return result
     except Exception as e:
+        logger.error(f"Exception: {e}")
         return {"error": str(e)}
 def respond(message, history):
     logger.info("="*50)
+    logger.info(f"USER MESSAGE: {message}")
+    logger.info(f"History Length: {len(history)}")
+    prompt = ""
+    for user_msg, assistant_msg in history:
+        prompt += f"User: {user_msg}
+Assistant: {assistant_msg}
+"
+    prompt += f"User: {message}
+Assistant:"
+    logger.info(f"Formatted Prompt:
+{prompt}")
     payload = {
+        "inputs": prompt,
+        "parameters": {"max_new_tokens": 512, "temperature": 0.7, "do_sample": True}
     }
     result = query_model(payload)
     if "error" in result:
+        return f"Error: {result['error']}"
     if isinstance(result, list) and len(result) > 0:
+        text = result[0].get("generated_text", "")
     else:
+        text = str(result)
+    logger.info(f"RESPONSE: {text}")
     logger.info("="*50)
+    return text
 logger.info("Building Gradio Interface...")
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# CPU Chatbot")
     gr.Markdown(f"### Powered by {MODEL_NAME}")
+    gr.Markdown("Check Container Logs to see AI thinking!")
     chatbot = gr.Chatbot(height=500)
     msg = gr.Textbox(placeholder="Ask me anything...", label="Your message")
 if __name__ == "__main__":
     logger.info("Launching Gradio App...")
     demo.launch()