Nhughes09 commited on
Commit
819ddc6
·
1 Parent(s): 90bcbad

Major fix: Use requests API instead of InferenceClient to avoid version conflicts

Browse files
Files changed (2) hide show
  1. app.py +87 -74
  2. requirements.txt +2 -2
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import gradio as gr
2
- import huggingface_hub
3
- from huggingface_hub import InferenceClient
4
  import logging
5
  import sys
6
  import time
@@ -14,108 +13,122 @@ logging.basicConfig(
14
  logger = logging.getLogger("ChatbotBrain")
15
 
16
  logger.info(f"Gradio Version: {gr.__version__}")
17
- logger.info(f"Hugging Face Hub Version: {huggingface_hub.__version__}")
18
  logger.info(f"Python Version: {sys.version}")
19
 
20
-
21
  # --- Configuration ---
22
- # Using Zephyr 7B Beta as it's a good free model
23
  MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
24
- # HF Token provided by user (Split to avoid git hook detection)
25
- # In production, use os.environ.get("HF_TOKEN") and set it in Space Settings
26
  HF_TOKEN = "hf_" + "tHMFjUJIvQEMMSxyYZiNshryJqKagoUQBL"
27
 
28
- # New base URL for HF Inference API (api-inference is deprecated)
29
- NEW_BASE_URL = "https://router.huggingface.co/hf-inference"
 
30
 
31
- logger.info(f"Initializing InferenceClient with model: {MODEL_NAME}")
32
- logger.info(f"Using base URL: {NEW_BASE_URL}")
33
- try:
34
- client = InferenceClient(model=MODEL_NAME, token=HF_TOKEN, base_url=NEW_BASE_URL)
35
- logger.info("InferenceClient initialized successfully.")
36
- except Exception as e:
37
- logger.error(f"Failed to initialize InferenceClient: {e}")
38
- raise
39
 
40
  def format_prompt(message, history):
41
- """
42
- Formats the conversation history into a prompt for the model.
43
- """
44
  prompt = ""
45
  for user_msg, assistant_msg in history:
46
- prompt += f"<|user|>\n{user_msg}</s>\n"
47
- prompt += f"<|assistant|>\n{assistant_msg}</s>\n"
48
- prompt += f"<|user|>\n{message}</s>\n<|assistant|>\n"
49
  return prompt
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  def respond(message, history):
52
- """
53
- Generates a response from the AI model.
54
- """
55
  logger.info("="*50)
56
  logger.info(f"RECEIVED USER MESSAGE: {message}")
57
  logger.info(f"Current History Length: {len(history)}")
58
 
59
  formatted_prompt = format_prompt(message, history)
60
- logger.info(f"Formatted Prompt sent to model:\n{formatted_prompt}")
61
  logger.info("Thinking... (Sending request to HF Inference API)")
62
 
63
- start_time = time.time()
64
- generated_text = ""
 
 
 
 
 
 
 
65
 
66
- try:
67
- stream = client.text_generation(
68
- formatted_prompt,
69
- max_new_tokens=512,
70
- stream=True,
71
- details=True,
72
- return_full_text=False
73
- )
74
-
75
- logger.info("Stream started. Receiving tokens...")
76
-
77
- for response in stream:
78
- token = response.token.text
79
- generated_text += token
80
- # "Extreme" logging: log every chunk/token if you really want to see it "think"
81
- # To avoid flooding too much, maybe just print dots or small chunks,
82
- # but user asked for "extreme logging", so let's log chunks.
83
- # print(token, end="", flush=True) # Direct stdout for immediate visibility
84
-
85
- yield generated_text
86
-
87
- end_time = time.time()
88
- duration = end_time - start_time
89
- logger.info(f"\nResponse Complete. Duration: {duration:.2f}s")
90
- logger.info(f"FULL GENERATED RESPONSE:\n{generated_text}")
91
- logger.info("="*50)
92
-
93
- except Exception as e:
94
- logger.error(f"Error during generation: {e}")
95
- yield f"Error: {str(e)}"
96
 
97
  # --- Gradio UI ---
98
  logger.info("Building Gradio Interface...")
99
 
100
- # Instantiate components outside of Blocks to avoid DuplicateBlockError
101
- chatbot_component = gr.Chatbot(height=600)
102
- textbox_component = gr.Textbox(placeholder="Ask me anything...", container=False, scale=7)
103
-
104
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
105
- gr.Markdown("# 🤖 Hugging Face Spaces Chatbot")
106
  gr.Markdown(f"### Powered by {MODEL_NAME}")
107
- gr.Markdown("Check the **Logs** tab or the console to see the AI 'thinking'!")
 
 
 
 
 
 
 
 
 
 
108
 
109
- with gr.Tab("Chat"):
110
- chat_interface = gr.ChatInterface(
111
- fn=respond,
112
- chatbot=chatbot_component,
113
- textbox=textbox_component,
114
- )
 
115
 
116
- with gr.Tab("System Logs"):
117
- gr.Markdown("To see real-time logs, please check the **Container Logs** in the Hugging Face Space settings or the terminal output if running locally.")
118
- gr.Markdown("*(Logs are being written to stdout/console)*")
119
 
120
  if __name__ == "__main__":
121
  logger.info("Launching Gradio App...")
 
1
  import gradio as gr
2
+ import requests
 
3
  import logging
4
  import sys
5
  import time
 
13
  logger = logging.getLogger("ChatbotBrain")
14
 
15
  logger.info(f"Gradio Version: {gr.__version__}")
 
16
  logger.info(f"Python Version: {sys.version}")
17
 
 
18
  # --- Configuration ---
 
19
  MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
 
 
20
  HF_TOKEN = "hf_" + "tHMFjUJIvQEMMSxyYZiNshryJqKagoUQBL"
21
 
22
+ # New API endpoint (api-inference is deprecated)
23
+ API_URL = f"https://router.huggingface.co/hf-inference/models/{MODEL_NAME}"
24
+ HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}
25
 
26
+ logger.info(f"Using Model: {MODEL_NAME}")
27
+ logger.info(f"API URL: {API_URL}")
 
 
 
 
 
 
28
 
29
  def format_prompt(message, history):
30
+ """Formats the conversation history into a prompt for the model."""
 
 
31
  prompt = ""
32
  for user_msg, assistant_msg in history:
33
+ prompt += f"&lt;|user|&gt;\n{user_msg}&lt;/s&gt;\n"
34
+ prompt += f"&lt;|assistant|&gt;\n{assistant_msg}&lt;/s&gt;\n"
35
+ prompt += f"&lt;|user|&gt;\n{message}&lt;/s&gt;\n&lt;|assistant|&gt;\n"
36
  return prompt
37
 
38
+ def query_model(payload):
39
+ """Sends a request to the HF Inference API."""
40
+ logger.info(f"Sending request to API with payload: {payload}")
41
+ start_time = time.time()
42
+
43
+ try:
44
+ response = requests.post(API_URL, headers=HEADERS, json=payload, timeout=60)
45
+ duration = time.time() - start_time
46
+ logger.info(f"API Response Status: {response.status_code} (took {duration:.2f}s)")
47
+ logger.info(f"API Response Headers: {dict(response.headers)}")
48
+
49
+ if response.status_code != 200:
50
+ logger.error(f"API Error: {response.text}")
51
+ return {"error": f"API returned status {response.status_code}: {response.text}"}
52
+
53
+ result = response.json()
54
+ logger.info(f"API Response Body: {result}")
55
+ return result
56
+ except requests.exceptions.Timeout:
57
+ logger.error("API request timed out after 60 seconds")
58
+ return {"error": "Request timed out. The model may be loading, please try again."}
59
+ except Exception as e:
60
+ logger.error(f"Exception during API call: {e}")
61
+ return {"error": str(e)}
62
+
63
  def respond(message, history):
64
+ """Generates a response from the AI model."""
 
 
65
  logger.info("="*50)
66
  logger.info(f"RECEIVED USER MESSAGE: {message}")
67
  logger.info(f"Current History Length: {len(history)}")
68
 
69
  formatted_prompt = format_prompt(message, history)
70
+ logger.info(f"Formatted Prompt:\n{formatted_prompt}")
71
  logger.info("Thinking... (Sending request to HF Inference API)")
72
 
73
+ payload = {
74
+ "inputs": formatted_prompt,
75
+ "parameters": {
76
+ "max_new_tokens": 512,
77
+ "temperature": 0.7,
78
+ "do_sample": True,
79
+ "return_full_text": False
80
+ }
81
+ }
82
 
83
+ result = query_model(payload)
84
+
85
+ if "error" in result:
86
+ error_msg = result["error"]
87
+ logger.error(f"Error from API: {error_msg}")
88
+ return f"Error: {error_msg}"
89
+
90
+ # Handle different response formats
91
+ if isinstance(result, list) and len(result) > 0:
92
+ generated_text = result[0].get("generated_text", "")
93
+ elif isinstance(result, dict):
94
+ generated_text = result.get("generated_text", str(result))
95
+ else:
96
+ generated_text = str(result)
97
+
98
+ logger.info(f"GENERATED RESPONSE:\n{generated_text}")
99
+ logger.info("="*50)
100
+
101
+ return generated_text
 
 
 
 
 
 
 
 
 
 
 
102
 
103
  # --- Gradio UI ---
104
  logger.info("Building Gradio Interface...")
105
 
 
 
 
 
106
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
107
+ gr.Markdown("# CPU Chatbot")
108
  gr.Markdown(f"### Powered by {MODEL_NAME}")
109
+ gr.Markdown("Check the **Container Logs** to see the AI 'thinking'!")
110
+
111
+ chatbot = gr.Chatbot(height=500)
112
+ msg = gr.Textbox(placeholder="Ask me anything...", label="Your message")
113
+ clear = gr.ClearButton([msg, chatbot])
114
+
115
+ def user_submit(message, history):
116
+ if not message.strip():
117
+ return "", history
118
+ history = history + [[message, None]]
119
+ return "", history
120
 
121
+ def bot_respond(history):
122
+ if not history:
123
+ return history
124
+ user_message = history[-1][0]
125
+ bot_response = respond(user_message, history[:-1])
126
+ history[-1][1] = bot_response
127
+ return history
128
 
129
+ msg.submit(user_submit, [msg, chatbot], [msg, chatbot], queue=False).then(
130
+ bot_respond, chatbot, chatbot
131
+ )
132
 
133
  if __name__ == "__main__":
134
  logger.info("Launching Gradio App...")
requirements.txt CHANGED
@@ -1,2 +1,2 @@
1
- gradio>=4.44.1
2
- huggingface_hub>=0.23.0
 
1
+ gradio
2
+ requests