Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,13 +2,20 @@ import gradio as gr
|
|
| 2 |
import os
|
| 3 |
from huggingface_hub import InferenceClient
|
| 4 |
|
| 5 |
-
#
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
-
def respond(message, history, system_message, temperature, request: gr.Request):
|
| 9 |
# --- 1. Authentication Logic ---
|
| 10 |
token = None
|
| 11 |
-
# Safely access token (handles both Local run and Spaces)
|
| 12 |
if request:
|
| 13 |
token = getattr(request, "token", None)
|
| 14 |
|
|
@@ -20,12 +27,12 @@ def respond(message, history, system_message, temperature, request: gr.Request):
|
|
| 20 |
return
|
| 21 |
|
| 22 |
# --- 2. Setup Client ---
|
|
|
|
| 23 |
client = InferenceClient(model_id, token=token)
|
| 24 |
|
| 25 |
-
# --- 3. Build Messages
|
| 26 |
messages = [{"role": "system", "content": system_message}]
|
| 27 |
|
| 28 |
-
# We use type="messages" in ChatInterface, so history is already a list of dicts
|
| 29 |
for msg in history:
|
| 30 |
messages.append(msg)
|
| 31 |
|
|
@@ -53,22 +60,43 @@ def respond(message, history, system_message, temperature, request: gr.Request):
|
|
| 53 |
yield response_text
|
| 54 |
|
| 55 |
except Exception as e:
|
| 56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
# --- 5. Build UI ---
|
| 59 |
with gr.Blocks(fill_height=True) as demo:
|
| 60 |
with gr.Sidebar():
|
| 61 |
-
gr.Markdown("#
|
| 62 |
-
gr.Markdown(
|
| 63 |
gr.LoginButton("Sign in")
|
| 64 |
|
| 65 |
gr.ChatInterface(
|
| 66 |
respond,
|
| 67 |
-
# 'type="messages"' fixes the deprecation warning and makes parsing easier
|
| 68 |
type="messages",
|
| 69 |
additional_inputs=[
|
| 70 |
-
gr.Textbox(
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
]
|
| 73 |
)
|
| 74 |
|
|
|
|
| 2 |
import os
|
| 3 |
from huggingface_hub import InferenceClient
|
| 4 |
|
| 5 |
+
# --- UPDATED: Best Coding Models on Free Tier (2025/2026) ---
|
| 6 |
+
AVAILABLE_MODELS = [
|
| 7 |
+
"Qwen/Qwen2.5-Coder-32B-Instruct", # SOTA Coding Model (Best overall)
|
| 8 |
+
"Qwen/Qwen2.5-72B-Instruct", # Larger General Model (Great at Logic)
|
| 9 |
+
"meta-llama/Llama-3.1-8B-Instruct", # Fast & Reliable
|
| 10 |
+
"mistralai/Mistral-Nemo-Instruct-2407", # 12B Model (Very Smart)
|
| 11 |
+
"microsoft/Phi-3.5-mini-instruct", # Extremely Fast
|
| 12 |
+
"bigcode/starcoder2-15b-instruct-v0.1", # Specialized Code Model
|
| 13 |
+
"HuggingFaceH4/zephyr-7b-beta", # Backup Model
|
| 14 |
+
]
|
| 15 |
|
| 16 |
+
def respond(message, history, system_message, temperature, model_id, request: gr.Request):
|
| 17 |
# --- 1. Authentication Logic ---
|
| 18 |
token = None
|
|
|
|
| 19 |
if request:
|
| 20 |
token = getattr(request, "token", None)
|
| 21 |
|
|
|
|
| 27 |
return
|
| 28 |
|
| 29 |
# --- 2. Setup Client ---
|
| 30 |
+
# The client is created dynamically based on the selected model_id
|
| 31 |
client = InferenceClient(model_id, token=token)
|
| 32 |
|
| 33 |
+
# --- 3. Build Messages ---
|
| 34 |
messages = [{"role": "system", "content": system_message}]
|
| 35 |
|
|
|
|
| 36 |
for msg in history:
|
| 37 |
messages.append(msg)
|
| 38 |
|
|
|
|
| 60 |
yield response_text
|
| 61 |
|
| 62 |
except Exception as e:
|
| 63 |
+
# Better Error Handling for Model Switching
|
| 64 |
+
error_msg = str(e)
|
| 65 |
+
if "404" in error_msg or "model_not_supported" in error_msg:
|
| 66 |
+
yield f"Error: The model **{model_id}** is currently offline or overloaded. \n\n👉 Please select a different model from the dropdown above."
|
| 67 |
+
else:
|
| 68 |
+
yield f"Error: {error_msg}"
|
| 69 |
|
| 70 |
# --- 5. Build UI ---
|
| 71 |
with gr.Blocks(fill_height=True) as demo:
|
| 72 |
with gr.Sidebar():
|
| 73 |
+
gr.Markdown("# 🤖 Multi-Model Coding Assistant")
|
| 74 |
+
gr.Markdown("Select a model from the **Additional Inputs** menu below to switch between different AI coding experts.")
|
| 75 |
gr.LoginButton("Sign in")
|
| 76 |
|
| 77 |
gr.ChatInterface(
|
| 78 |
respond,
|
|
|
|
| 79 |
type="messages",
|
| 80 |
additional_inputs=[
|
| 81 |
+
gr.Textbox(
|
| 82 |
+
value="You are an expert software engineer. Provide clean, efficient, and well-commented code.",
|
| 83 |
+
label="System Instruction",
|
| 84 |
+
lines=2
|
| 85 |
+
),
|
| 86 |
+
gr.Slider(
|
| 87 |
+
minimum=0.1,
|
| 88 |
+
maximum=2.0,
|
| 89 |
+
value=0.5,
|
| 90 |
+
step=0.1,
|
| 91 |
+
label="Temperature (Lower = More Precise)"
|
| 92 |
+
),
|
| 93 |
+
# --- Model Selection Dropdown ---
|
| 94 |
+
gr.Dropdown(
|
| 95 |
+
choices=AVAILABLE_MODELS,
|
| 96 |
+
value=AVAILABLE_MODELS[0], # Default to Qwen 2.5 Coder
|
| 97 |
+
label="Select AI Model",
|
| 98 |
+
interactive=True
|
| 99 |
+
)
|
| 100 |
]
|
| 101 |
)
|
| 102 |
|