Spaces:

COUNTfrogula
/

count-frogula-ai-integration

Runtime error

App Files Files Community

count-frogula-ai-integration / app.py

COUNTfrogula

Update app.py

f9a860e verified 3 months ago

raw

history blame contribute delete

4.1 kB

	import os
	import gradio as gr
	from huggingface_hub import InferenceClient

	# ============================================================
	# COUNT FROGULA'S AI INTEGRATION STACK
	# Full InferenceClient + OpenAI-compatible HF Router
	# Models: Top LLMs, VLMs, Coders, TTS/ASR, Embeddings
	# Router: https://router.huggingface.co/v1
	# ============================================================

	MODELS = {
	"Qwen3.5-397B-A17B (VLM)": "Qwen/Qwen3.5-397B-A17B",
	"Kimi-K2.5 (VLM)": "moonshotai/Kimi-K2.5",
	"GLM-5 (754B)": "zai-org/GLM-5",
	"Qwen3-Coder-Next": "Qwen/Qwen3-Coder-Next",
	"Llama-3.3-70B": "meta-llama/Llama-3.3-70B-Instruct",
	"Llama-3.1-8B": "meta-llama/Meta-Llama-3.1-8B-Instruct",
	"DeepSeek-R1": "deepseek-ai/DeepSeek-R1",
	"Qwen3-8B": "Qwen/Qwen3-8B",
	"GPT-OSS-120B": "openai/gpt-oss-120b",
	"GPT-OSS-20B": "openai/gpt-oss-20b",
	}

	SYSTEM_DEFAULT = """You are COUNT FROGULA's AI Integration Assistant.
	You have access to the full HuggingFace AI stack:
	- Top LLMs & VLMs via InferenceClient
	- OpenAI-compatible endpoint at https://router.huggingface.co/v1
	- smolagents agentic framework
	- MCP tools integration
	Be helpful, precise, and maximize capability in every response."""


	def respond(
	message,
	history: list[dict],
	model_choice,
	system_message,
	max_tokens,
	temperature,
	top_p,
	hf_token: gr.OAuthToken \| None,
	):
	token = hf_token.token if hf_token else os.environ.get("HF_TOKEN", "")
	model_id = MODELS.get(model_choice, "openai/gpt-oss-20b")

	client = InferenceClient(
	token=token,
	model=model_id,
	base_url="https://router.huggingface.co/v1",
	)

	messages = [{"role": "system", "content": system_message}]
	messages.extend(history)
	messages.append({"role": "user", "content": message})

	response = ""
	for chunk in client.chat.completions.create(
	messages=messages,
	max_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p,
	stream=True,
	):
	token_text = chunk.choices[0].delta.content or ""
	response += token_text
	yield response


	with gr.Blocks(
	theme=gr.themes.Soft(primary_hue="purple", secondary_hue="green"),
	title="COUNT FROGULA's AI Integration Stack",
	) as demo:
	gr.Markdown(
	"""
	# COUNT FROGULA's AI Integration Stack
	Full HF InferenceClient + OpenAI-compatible Router
	> Models: Top LLMs, VLMs, Coders \| Router: `https://router.huggingface.co/v1`
	"""
	)

	with gr.Row():
	model_dropdown = gr.Dropdown(
	choices=list(MODELS.keys()),
	value="GPT-OSS-20B",
	label="Model",
	scale=2,
	)
	gr.LoginButton(scale=1)

	chatbot = gr.ChatInterface(
	respond,
	type="messages",
	additional_inputs=[
	model_dropdown,
	gr.Textbox(value=SYSTEM_DEFAULT, label="System Message", lines=4),
	gr.Slider(minimum=1, maximum=8192, value=2048, step=1, label="Max Tokens"),
	gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.05, label="Temperature"),
	gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
	gr.OAuthToken(),
	],
	examples=[
	["Explain the HuggingFace InferenceClient API and how to use the OpenAI-compatible router at router.huggingface.co/v1"],
	["Write a Python script using smolagents with HuggingFace tools to build an autonomous coding agent"],
	["Compare Qwen3.5-397B, Kimi-K2.5, and GLM-5 for enterprise AI integration tasks"],
	["Generate a complete Next.js + Vercel deployment config for a HuggingFace Spaces integration"],
	],
	cache_examples=False,
	)

	gr.Markdown(
	"""
	---
	Integration Stack: InferenceClient \| smolagents \| MCP Tools \| OpenAI Router
	Collection: [COUNT FROGULA's AI Integration Stack](https://huggingface.co/collections/COUNTfrogula/count-frogulas-ai-integration-stack)
	"""
	)

	if __name__ == "__main__":
	demo.launch()