Spaces:

ndwdgda
/

cpu

Sleeping

App Files Files Community

Nhughes09 commited on Dec 11, 2025

Commit

819ddc6

1 Parent(s): 90bcbad

Major fix: Use requests API instead of InferenceClient to avoid version conflicts

Browse files

Files changed (2) hide show

app.py +87 -74
requirements.txt +2 -2

app.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import gradio as gr
-import huggingface_hub
-from huggingface_hub import InferenceClient
 import logging
 import sys
 import time
@@ -14,108 +13,122 @@ logging.basicConfig(
 logger = logging.getLogger("ChatbotBrain")
 logger.info(f"Gradio Version: {gr.__version__}")
-logger.info(f"Hugging Face Hub Version: {huggingface_hub.__version__}")
 logger.info(f"Python Version: {sys.version}")
 # --- Configuration ---
-# Using Zephyr 7B Beta as it's a good free model
 MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
-# HF Token provided by user (Split to avoid git hook detection)
-# In production, use os.environ.get("HF_TOKEN") and set it in Space Settings
 HF_TOKEN = "hf_" + "tHMFjUJIvQEMMSxyYZiNshryJqKagoUQBL"
-# New base URL for HF Inference API (api-inference is deprecated)
-NEW_BASE_URL = "https://router.huggingface.co/hf-inference"
-logger.info(f"Initializing InferenceClient with model: {MODEL_NAME}")
-logger.info(f"Using base URL: {NEW_BASE_URL}")
-try:
-    client = InferenceClient(model=MODEL_NAME, token=HF_TOKEN, base_url=NEW_BASE_URL)
-    logger.info("InferenceClient initialized successfully.")
-except Exception as e:
-    logger.error(f"Failed to initialize InferenceClient: {e}")
-    raise
 def format_prompt(message, history):
-    """
-    Formats the conversation history into a prompt for the model.
-    """
     prompt = ""
     for user_msg, assistant_msg in history:
-        prompt += f"<|user|>\n{user_msg}</s>\n"
-        prompt += f"<|assistant|>\n{assistant_msg}</s>\n"
-    prompt += f"<|user|>\n{message}</s>\n<|assistant|>\n"
     return prompt
 def respond(message, history):
-    """
-    Generates a response from the AI model.
-    """
     logger.info("="*50)
     logger.info(f"RECEIVED USER MESSAGE: {message}")
     logger.info(f"Current History Length: {len(history)}")
     formatted_prompt = format_prompt(message, history)
-    logger.info(f"Formatted Prompt sent to model:\n{formatted_prompt}")
     logger.info("Thinking... (Sending request to HF Inference API)")
-    start_time = time.time()
-    generated_text = ""
-    try:
-        stream = client.text_generation(
-            formatted_prompt,
-            max_new_tokens=512,
-            stream=True,
-            details=True,
-            return_full_text=False
-        )
-        logger.info("Stream started. Receiving tokens...")
-        for response in stream:
-            token = response.token.text
-            generated_text += token
-            # "Extreme" logging: log every chunk/token if you really want to see it "think"
-            # To avoid flooding too much, maybe just print dots or small chunks,
-            # but user asked for "extreme logging", so let's log chunks.
-            # print(token, end="", flush=True) # Direct stdout for immediate visibility
-            yield generated_text
-        end_time = time.time()
-        duration = end_time - start_time
-        logger.info(f"\nResponse Complete. Duration: {duration:.2f}s")
-        logger.info(f"FULL GENERATED RESPONSE:\n{generated_text}")
-        logger.info("="*50)
-    except Exception as e:
-        logger.error(f"Error during generation: {e}")
-        yield f"Error: {str(e)}"
 # --- Gradio UI ---
 logger.info("Building Gradio Interface...")
-# Instantiate components outside of Blocks to avoid DuplicateBlockError
-chatbot_component = gr.Chatbot(height=600)
-textbox_component = gr.Textbox(placeholder="Ask me anything...", container=False, scale=7)
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🤖 Hugging Face Spaces Chatbot")
     gr.Markdown(f"### Powered by {MODEL_NAME}")
-    gr.Markdown("Check the **Logs** tab or the console to see the AI 'thinking'!")
-    with gr.Tab("Chat"):
-        chat_interface = gr.ChatInterface(
-            fn=respond,
-            chatbot=chatbot_component,
-            textbox=textbox_component,
-        )
-    with gr.Tab("System Logs"):
-        gr.Markdown("To see real-time logs, please check the **Container Logs** in the Hugging Face Space settings or the terminal output if running locally.")
-        gr.Markdown("*(Logs are being written to stdout/console)*")
 if __name__ == "__main__":
     logger.info("Launching Gradio App...")

 import gradio as gr
+import requests
 import logging
 import sys
 import time
 logger = logging.getLogger("ChatbotBrain")
 logger.info(f"Gradio Version: {gr.__version__}")
 logger.info(f"Python Version: {sys.version}")
 # --- Configuration ---
 MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
 HF_TOKEN = "hf_" + "tHMFjUJIvQEMMSxyYZiNshryJqKagoUQBL"
+# New API endpoint (api-inference is deprecated)
+API_URL = f"https://router.huggingface.co/hf-inference/models/{MODEL_NAME}"
+HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}
+logger.info(f"Using Model: {MODEL_NAME}")
+logger.info(f"API URL: {API_URL}")
 def format_prompt(message, history):
+    """Formats the conversation history into a prompt for the model."""
     prompt = ""
     for user_msg, assistant_msg in history:
+        prompt += f"&lt;|user|&gt;\n{user_msg}&lt;/s&gt;\n"
+        prompt += f"&lt;|assistant|&gt;\n{assistant_msg}&lt;/s&gt;\n"
+    prompt += f"&lt;|user|&gt;\n{message}&lt;/s&gt;\n&lt;|assistant|&gt;\n"
     return prompt
+def query_model(payload):
+    """Sends a request to the HF Inference API."""
+    logger.info(f"Sending request to API with payload: {payload}")
+    start_time = time.time()
+    try:
+        response = requests.post(API_URL, headers=HEADERS, json=payload, timeout=60)
+        duration = time.time() - start_time
+        logger.info(f"API Response Status: {response.status_code} (took {duration:.2f}s)")
+        logger.info(f"API Response Headers: {dict(response.headers)}")
+        if response.status_code != 200:
+            logger.error(f"API Error: {response.text}")
+            return {"error": f"API returned status {response.status_code}: {response.text}"}
+        result = response.json()
+        logger.info(f"API Response Body: {result}")
+        return result
+    except requests.exceptions.Timeout:
+        logger.error("API request timed out after 60 seconds")
+        return {"error": "Request timed out. The model may be loading, please try again."}
+    except Exception as e:
+        logger.error(f"Exception during API call: {e}")
+        return {"error": str(e)}
 def respond(message, history):
+    """Generates a response from the AI model."""
     logger.info("="*50)
     logger.info(f"RECEIVED USER MESSAGE: {message}")
     logger.info(f"Current History Length: {len(history)}")
     formatted_prompt = format_prompt(message, history)
+    logger.info(f"Formatted Prompt:\n{formatted_prompt}")
     logger.info("Thinking... (Sending request to HF Inference API)")
+    payload = {
+        "inputs": formatted_prompt,
+        "parameters": {
+            "max_new_tokens": 512,
+            "temperature": 0.7,
+            "do_sample": True,
+            "return_full_text": False
+        }
+    }
+    result = query_model(payload)
+    if "error" in result:
+        error_msg = result["error"]
+        logger.error(f"Error from API: {error_msg}")
+        return f"Error: {error_msg}"
+    # Handle different response formats
+    if isinstance(result, list) and len(result) > 0:
+        generated_text = result[0].get("generated_text", "")
+    elif isinstance(result, dict):
+        generated_text = result.get("generated_text", str(result))
+    else:
+        generated_text = str(result)
+    logger.info(f"GENERATED RESPONSE:\n{generated_text}")
+    logger.info("="*50)
+    return generated_text
 # --- Gradio UI ---
 logger.info("Building Gradio Interface...")
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# CPU Chatbot")
     gr.Markdown(f"### Powered by {MODEL_NAME}")
+    gr.Markdown("Check the **Container Logs** to see the AI 'thinking'!")
+    chatbot = gr.Chatbot(height=500)
+    msg = gr.Textbox(placeholder="Ask me anything...", label="Your message")
+    clear = gr.ClearButton([msg, chatbot])
+    def user_submit(message, history):
+        if not message.strip():
+            return "", history
+        history = history + [[message, None]]
+        return "", history
+    def bot_respond(history):
+        if not history:
+            return history
+        user_message = history[-1][0]
+        bot_response = respond(user_message, history[:-1])
+        history[-1][1] = bot_response
+        return history
+    msg.submit(user_submit, [msg, chatbot], [msg, chatbot], queue=False).then(
+        bot_respond, chatbot, chatbot
+    )
 if __name__ == "__main__":
     logger.info("Launching Gradio App...")

requirements.txt CHANGED Viewed

	@@ -1,2 +1,2 @@
1	- gradio~~>=4.44.1~~
2	- ~~huggingface_hub>=0.23.0~~


1	+ gradio
2	+ requests