Frusto commited on
Commit
fbe4031
·
verified ·
1 Parent(s): bd78a22

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -34
app.py CHANGED
@@ -2,18 +2,13 @@ import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  import traceback
4
 
5
- # --- Helper: Gradio 6.5 Text Extractor ---
6
  def get_text(content):
7
- """Extracts text safely from Gradio 6's list-of-dict message format."""
8
- if isinstance(content, str):
9
- return content
10
  if isinstance(content, list):
11
  return "".join([block.get("text", "") for block in content if block.get("type") == "text"])
12
- if isinstance(content, dict):
13
- return content.get("text", str(content))
14
  return str(content)
15
 
16
- # --- Core Function: The Chat Logic ---
17
  def respond(
18
  message,
19
  history: list[dict],
@@ -23,21 +18,19 @@ def respond(
23
  top_p,
24
  hf_token: gr.OAuthToken,
25
  ):
26
- # 1. Check Authentication
27
  if not hf_token or not hf_token.token:
28
- yield "⚠️ Please **Login** using the button in the sidebar to access the @frusto360 AI."
29
  return
30
 
31
  try:
32
- # 2. DIRECT ROUTING: Avoids the StopIteration error by bypassing the provider search
 
33
  MODEL_ID = "Frusto/llama-3.2-1b-frusto360-final"
34
- # We use the direct inference URL as the base_url
35
- client = InferenceClient(
36
- base_url=f"https://api-inference.huggingface.co/models/{MODEL_ID}",
37
- token=hf_token.token
38
- )
39
 
40
- # 3. Build Llama 3.2 Chat Template
41
  prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|>"
42
  for msg in history:
43
  role = msg.get("role", "user")
@@ -46,36 +39,27 @@ def respond(
46
 
47
  prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{get_text(message)}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
48
 
49
- # 4. Stream the Response
50
  response = ""
51
- # Note: 'model' parameter is OMITTED because it's already in the base_url
52
  for token in client.text_generation(
53
  prompt,
54
  max_new_tokens=max_tokens,
55
  stream=True,
56
  temperature=temperature,
57
  top_p=top_p,
58
- stop=["<|eot_id|>", "<|start_header_id|>"]
59
  ):
60
- # Compatibility check for token format
61
  token_text = token if isinstance(token, str) else getattr(token, 'token', getattr(token, 'text', str(token)))
62
  response += token_text
63
  yield response
64
 
65
  except Exception as e:
66
- error_msg = str(e)
67
- if "503" in error_msg:
68
- yield "⏳ **Model is starting up.** Hugging Face is loading the weights. Please try again in 30 seconds!"
69
- elif "404" in error_msg:
70
- yield f"❌ **Error 404:** Model not found or Inference API disabled on the model page."
71
- else:
72
- yield f"❌ **Error:** {error_msg}\n\n*Check the Space Logs for details.*"
73
 
74
- # --- Gradio UI Layout ---
75
  chatbot_interface = gr.ChatInterface(
76
  respond,
77
  additional_inputs=[
78
- gr.Textbox(value="You are a helpful assistant developed by @frusto360.", label="System message"),
79
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max tokens"),
80
  gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
81
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
@@ -84,13 +68,9 @@ chatbot_interface = gr.ChatInterface(
84
 
85
  with gr.Blocks(fill_height=True) as demo:
86
  with gr.Sidebar():
87
- gr.Markdown("## 🔐 @frusto360 Control")
88
  gr.LoginButton()
89
- gr.Markdown("---")
90
- gr.Markdown("Created by [@frusto360](https://youtube.com/@frusto360)")
91
-
92
  chatbot_interface.render()
93
 
94
  if __name__ == "__main__":
95
- # Gradio 6.0+ prefers theme in launch()
96
  demo.launch(theme="glass")
 
2
  from huggingface_hub import InferenceClient
3
  import traceback
4
 
 
5
  def get_text(content):
6
+ if isinstance(content, str): return content
 
 
7
  if isinstance(content, list):
8
  return "".join([block.get("text", "") for block in content if block.get("type") == "text"])
9
+ if isinstance(content, dict): return content.get("text", str(content))
 
10
  return str(content)
11
 
 
12
  def respond(
13
  message,
14
  history: list[dict],
 
18
  top_p,
19
  hf_token: gr.OAuthToken,
20
  ):
 
21
  if not hf_token or not hf_token.token:
22
+ yield "⚠️ Please **Login** in the sidebar to access @frusto360 AI."
23
  return
24
 
25
  try:
26
+ # NEW 2026 ROUTER URL
27
+ # We use the 'hf-inference' provider prefix on the new router domain
28
  MODEL_ID = "Frusto/llama-3.2-1b-frusto360-final"
29
+ API_URL = f"https://router.huggingface.co/hf-inference/models/{MODEL_ID}"
30
+
31
+ client = InferenceClient(base_url=API_URL, token=hf_token.token)
 
 
32
 
33
+ # Build Prompt
34
  prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|>"
35
  for msg in history:
36
  role = msg.get("role", "user")
 
39
 
40
  prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{get_text(message)}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
41
 
 
42
  response = ""
 
43
  for token in client.text_generation(
44
  prompt,
45
  max_new_tokens=max_tokens,
46
  stream=True,
47
  temperature=temperature,
48
  top_p=top_p,
49
+ stop=["<|eot_id|>"]
50
  ):
 
51
  token_text = token if isinstance(token, str) else getattr(token, 'token', getattr(token, 'text', str(token)))
52
  response += token_text
53
  yield response
54
 
55
  except Exception as e:
56
+ yield f"❌ **Router Error:** {str(e)}\n\n*Note: Ensure 'Inference API' is enabled in your model settings.*"
 
 
 
 
 
 
57
 
58
+ # UI Setup (Gradio 6.5)
59
  chatbot_interface = gr.ChatInterface(
60
  respond,
61
  additional_inputs=[
62
+ gr.Textbox(value="You are the @frusto360 AI.", label="System message"),
63
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max tokens"),
64
  gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
65
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
 
68
 
69
  with gr.Blocks(fill_height=True) as demo:
70
  with gr.Sidebar():
71
+ gr.Markdown("## 🔐 @frusto360 Auth")
72
  gr.LoginButton()
 
 
 
73
  chatbot_interface.render()
74
 
75
  if __name__ == "__main__":
 
76
  demo.launch(theme="glass")