Spaces:
Runtime error
Runtime error
| import os | |
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| # ============================================================ | |
| # COUNT FROGULA'S AI INTEGRATION STACK | |
| # Full InferenceClient + OpenAI-compatible HF Router | |
| # Models: Top LLMs, VLMs, Coders, TTS/ASR, Embeddings | |
| # Router: https://router.huggingface.co/v1 | |
| # ============================================================ | |
| MODELS = { | |
| "Qwen3.5-397B-A17B (VLM)": "Qwen/Qwen3.5-397B-A17B", | |
| "Kimi-K2.5 (VLM)": "moonshotai/Kimi-K2.5", | |
| "GLM-5 (754B)": "zai-org/GLM-5", | |
| "Qwen3-Coder-Next": "Qwen/Qwen3-Coder-Next", | |
| "Llama-3.3-70B": "meta-llama/Llama-3.3-70B-Instruct", | |
| "Llama-3.1-8B": "meta-llama/Meta-Llama-3.1-8B-Instruct", | |
| "DeepSeek-R1": "deepseek-ai/DeepSeek-R1", | |
| "Qwen3-8B": "Qwen/Qwen3-8B", | |
| "GPT-OSS-120B": "openai/gpt-oss-120b", | |
| "GPT-OSS-20B": "openai/gpt-oss-20b", | |
| } | |
| SYSTEM_DEFAULT = """You are COUNT FROGULA's AI Integration Assistant. | |
| You have access to the full HuggingFace AI stack: | |
| - Top LLMs & VLMs via InferenceClient | |
| - OpenAI-compatible endpoint at https://router.huggingface.co/v1 | |
| - smolagents agentic framework | |
| - MCP tools integration | |
| Be helpful, precise, and maximize capability in every response.""" | |
| def respond( | |
| message, | |
| history: list[dict], | |
| model_choice, | |
| system_message, | |
| max_tokens, | |
| temperature, | |
| top_p, | |
| hf_token: gr.OAuthToken | None, | |
| ): | |
| token = hf_token.token if hf_token else os.environ.get("HF_TOKEN", "") | |
| model_id = MODELS.get(model_choice, "openai/gpt-oss-20b") | |
| client = InferenceClient( | |
| token=token, | |
| model=model_id, | |
| base_url="https://router.huggingface.co/v1", | |
| ) | |
| messages = [{"role": "system", "content": system_message}] | |
| messages.extend(history) | |
| messages.append({"role": "user", "content": message}) | |
| response = "" | |
| for chunk in client.chat.completions.create( | |
| messages=messages, | |
| max_tokens=max_tokens, | |
| temperature=temperature, | |
| top_p=top_p, | |
| stream=True, | |
| ): | |
| token_text = chunk.choices[0].delta.content or "" | |
| response += token_text | |
| yield response | |
| with gr.Blocks( | |
| theme=gr.themes.Soft(primary_hue="purple", secondary_hue="green"), | |
| title="COUNT FROGULA's AI Integration Stack", | |
| ) as demo: | |
| gr.Markdown( | |
| """ | |
| # COUNT FROGULA's AI Integration Stack | |
| **Full HF InferenceClient + OpenAI-compatible Router** | |
| > Models: Top LLMs, VLMs, Coders | Router: `https://router.huggingface.co/v1` | |
| """ | |
| ) | |
| with gr.Row(): | |
| model_dropdown = gr.Dropdown( | |
| choices=list(MODELS.keys()), | |
| value="GPT-OSS-20B", | |
| label="Model", | |
| scale=2, | |
| ) | |
| gr.LoginButton(scale=1) | |
| chatbot = gr.ChatInterface( | |
| respond, | |
| type="messages", | |
| additional_inputs=[ | |
| model_dropdown, | |
| gr.Textbox(value=SYSTEM_DEFAULT, label="System Message", lines=4), | |
| gr.Slider(minimum=1, maximum=8192, value=2048, step=1, label="Max Tokens"), | |
| gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.05, label="Temperature"), | |
| gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"), | |
| gr.OAuthToken(), | |
| ], | |
| examples=[ | |
| ["Explain the HuggingFace InferenceClient API and how to use the OpenAI-compatible router at router.huggingface.co/v1"], | |
| ["Write a Python script using smolagents with HuggingFace tools to build an autonomous coding agent"], | |
| ["Compare Qwen3.5-397B, Kimi-K2.5, and GLM-5 for enterprise AI integration tasks"], | |
| ["Generate a complete Next.js + Vercel deployment config for a HuggingFace Spaces integration"], | |
| ], | |
| cache_examples=False, | |
| ) | |
| gr.Markdown( | |
| """ | |
| --- | |
| **Integration Stack:** InferenceClient | smolagents | MCP Tools | OpenAI Router | |
| **Collection:** [COUNT FROGULA's AI Integration Stack](https://huggingface.co/collections/COUNTfrogula/count-frogulas-ai-integration-stack) | |
| """ | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |