COUNTfrogula's picture
Update app.py
f9a860e verified
import os
import gradio as gr
from huggingface_hub import InferenceClient
# ============================================================
# COUNT FROGULA'S AI INTEGRATION STACK
# Full InferenceClient + OpenAI-compatible HF Router
# Models: Top LLMs, VLMs, Coders, TTS/ASR, Embeddings
# Router: https://router.huggingface.co/v1
# ============================================================
MODELS = {
"Qwen3.5-397B-A17B (VLM)": "Qwen/Qwen3.5-397B-A17B",
"Kimi-K2.5 (VLM)": "moonshotai/Kimi-K2.5",
"GLM-5 (754B)": "zai-org/GLM-5",
"Qwen3-Coder-Next": "Qwen/Qwen3-Coder-Next",
"Llama-3.3-70B": "meta-llama/Llama-3.3-70B-Instruct",
"Llama-3.1-8B": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"DeepSeek-R1": "deepseek-ai/DeepSeek-R1",
"Qwen3-8B": "Qwen/Qwen3-8B",
"GPT-OSS-120B": "openai/gpt-oss-120b",
"GPT-OSS-20B": "openai/gpt-oss-20b",
}
SYSTEM_DEFAULT = """You are COUNT FROGULA's AI Integration Assistant.
You have access to the full HuggingFace AI stack:
- Top LLMs & VLMs via InferenceClient
- OpenAI-compatible endpoint at https://router.huggingface.co/v1
- smolagents agentic framework
- MCP tools integration
Be helpful, precise, and maximize capability in every response."""
def respond(
message,
history: list[dict],
model_choice,
system_message,
max_tokens,
temperature,
top_p,
hf_token: gr.OAuthToken | None,
):
token = hf_token.token if hf_token else os.environ.get("HF_TOKEN", "")
model_id = MODELS.get(model_choice, "openai/gpt-oss-20b")
client = InferenceClient(
token=token,
model=model_id,
base_url="https://router.huggingface.co/v1",
)
messages = [{"role": "system", "content": system_message}]
messages.extend(history)
messages.append({"role": "user", "content": message})
response = ""
for chunk in client.chat.completions.create(
messages=messages,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
stream=True,
):
token_text = chunk.choices[0].delta.content or ""
response += token_text
yield response
with gr.Blocks(
theme=gr.themes.Soft(primary_hue="purple", secondary_hue="green"),
title="COUNT FROGULA's AI Integration Stack",
) as demo:
gr.Markdown(
"""
# COUNT FROGULA's AI Integration Stack
**Full HF InferenceClient + OpenAI-compatible Router**
> Models: Top LLMs, VLMs, Coders | Router: `https://router.huggingface.co/v1`
"""
)
with gr.Row():
model_dropdown = gr.Dropdown(
choices=list(MODELS.keys()),
value="GPT-OSS-20B",
label="Model",
scale=2,
)
gr.LoginButton(scale=1)
chatbot = gr.ChatInterface(
respond,
type="messages",
additional_inputs=[
model_dropdown,
gr.Textbox(value=SYSTEM_DEFAULT, label="System Message", lines=4),
gr.Slider(minimum=1, maximum=8192, value=2048, step=1, label="Max Tokens"),
gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.05, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
gr.OAuthToken(),
],
examples=[
["Explain the HuggingFace InferenceClient API and how to use the OpenAI-compatible router at router.huggingface.co/v1"],
["Write a Python script using smolagents with HuggingFace tools to build an autonomous coding agent"],
["Compare Qwen3.5-397B, Kimi-K2.5, and GLM-5 for enterprise AI integration tasks"],
["Generate a complete Next.js + Vercel deployment config for a HuggingFace Spaces integration"],
],
cache_examples=False,
)
gr.Markdown(
"""
---
**Integration Stack:** InferenceClient | smolagents | MCP Tools | OpenAI Router
**Collection:** [COUNT FROGULA's AI Integration Stack](https://huggingface.co/collections/COUNTfrogula/count-frogulas-ai-integration-stack)
"""
)
if __name__ == "__main__":
demo.launch()