Instructions to use my-ai-stack/Stack-2-9-finetuned with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use my-ai-stack/Stack-2-9-finetuned with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="my-ai-stack/Stack-2-9-finetuned")
messages = [
    {"role": "user", "content": "Who are you?"},
]
pipe(messages)

# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("my-ai-stack/Stack-2-9-finetuned")
model = AutoModelForCausalLM.from_pretrained("my-ai-stack/Stack-2-9-finetuned")
messages = [
    {"role": "user", "content": "Who are you?"},
]
inputs = tokenizer.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_dict=True,
	return_tensors="pt",
).to(model.device)

outputs = model.generate(**inputs, max_new_tokens=40)
print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:]))

Notebooks
Google Colab
Kaggle
Local Apps

vLLM

How to use my-ai-stack/Stack-2-9-finetuned with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "my-ai-stack/Stack-2-9-finetuned"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "my-ai-stack/Stack-2-9-finetuned",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Use Docker

docker model run hf.co/my-ai-stack/Stack-2-9-finetuned

SGLang

How to use my-ai-stack/Stack-2-9-finetuned with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "my-ai-stack/Stack-2-9-finetuned" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "my-ai-stack/Stack-2-9-finetuned",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "my-ai-stack/Stack-2-9-finetuned" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "my-ai-stack/Stack-2-9-finetuned",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Docker Model Runner
How to use my-ai-stack/Stack-2-9-finetuned with Docker Model Runner:
```
docker model run hf.co/my-ai-stack/Stack-2-9-finetuned
```

Stack-2-9-finetuned / stack /voice /stack_voice_integration.py

walidsobhie-code

refactor: Squeeze folders further - cleaner structure

65888d5 about 2 months ago

raw

history blame

6.16 kB

	import requests
	from typing import Optional, Union
	import io
	import json
	from voice_client import VoiceClient

	class StackWithVoice:
	def __init__(self, stack_api_url: str, voice_api_url: str = "http://localhost:8000"):
	self.stack_api_url = stack_api_url
	self.voice_client = VoiceClient(voice_api_url)
	self.session = requests.Session()

	# Cache for voice models to avoid repeated API calls
	self._voice_cache = {}

	def _get_stack_response(self, prompt: str) -> str:
	"""Get response from Stack 2.9 API"""
	try:
	response = self.session.post(
	f"{self.stack_api_url}/api/chat",
	json={"prompt": prompt, "model": "stack-2.9"},
	headers={"Content-Type": "application/json"}
	)
	response.raise_for_status()

	data = response.json()
	return data.get("response", "")

	except requests.RequestException as e:
	raise Exception(f"Stack API request failed: {str(e)}")

	def _get_voice_model(self, voice_name: str) -> Optional[dict]:
	"""Get voice model info from cache or API"""
	if voice_name in self._voice_cache:
	return self._voice_cache[voice_name]

	try:
	voices = self.voice_client.list_voices()
	for voice in voices:
	if voice == voice_name:
	self._voice_cache[voice_name] = {"name": voice_name}
	return {"name": voice_name}
	return None
	except Exception as e:
	print(f"Warning: Failed to get voice models: {e}")
	return None

	def voice_chat(self, prompt_audio_path: str, voice_name: str = "default") -> Optional[bytes]:
	"""Complete voice chat workflow: audio → text → response → audio"""
	# Step 1: Convert audio to text (placeholder - in real implementation, use speech-to-text)
	print(f"Converting audio to text: {prompt_audio_path}")
	prompt_text = self._audio_to_text(prompt_audio_path)
	if not prompt_text:
	return None

	print(f"User prompt: {prompt_text}")

	# Step 2: Get response from Stack 2.9
	print("Getting response from Stack 2.9...")
	response_text = self._get_stack_response(prompt_text)

	if not response_text:
	return None

	print(f"Stack response: {response_text}")

	# Step 3: Convert response to audio
	print(f"Generating voice response with voice: {voice_name}")
	audio_data = self.voice_client.synthesize(response_text, voice_name)

	return audio_data

	def _audio_to_text(self, audio_path: str) -> str:
	"""Convert audio to text (placeholder implementation)"""
	# In a real implementation, you would use a speech-to-text service
	# For now, return a placeholder or read from a text file with the same name
	text_path = audio_path.replace(".wav", ".txt").replace(".mp3", ".txt")

	if os.path.exists(text_path):
	with open(text_path, 'r') as f:
	return f.read().strip()

	# Fallback: return a generic prompt
	return "This is a test voice prompt."

	def voice_command(self, command: str, voice_name: str = "default") -> Optional[bytes]:
	"""Execute voice command and get spoken response"""
	print(f"Executing voice command: {command}")

	# In a real implementation, you would parse the command and execute appropriate actions
	# For now, just pass it to Stack 2.9 as-is
	response_text = self._get_stack_response(command)

	if not response_text:
	return None

	print(f"Command response: {response_text}")

	# Generate voice response
	audio_data = self.voice_client.synthesize(response_text, voice_name)

	return audio_data

	def streaming_voice_chat(self, prompt_audio_path: str, voice_name: str = "default") -> None:
	"""Stream voice chat (placeholder implementation)"""
	print("Starting streaming voice chat...")

	# Get initial response
	prompt_text = self._audio_to_text(prompt_audio_path)
	response_text = self._get_stack_response(prompt_text)

	if not response_text:
	print("No response received")
	return

	print("Streaming response:")
	print(response_text)

	# In a real streaming implementation, you would:
	# 1. Stream audio chunks to speech-to-text
	# 2. Send partial prompts to Stack 2.9
	# 3. Stream partial responses to TTS
	# 4. Play audio as it's generated

	# For now, just generate the complete response
	audio_data = self.voice_client.synthesize(response_text, voice_name, stream=True)

	# Save to file for demonstration
	output_path = "./streaming_response.wav"
	self.voice_client.download_audio(audio_data, output_path)
	print(f"Streaming response saved to: {output_path}")

	# Example usage
	if __name__ == "__main__":
	stack_voice = StackWithVoice(
	stack_api_url="http://localhost:5000", # Example Stack 2.9 API URL
	voice_api_url="http://localhost:8000"
	)

	print("Testing Stack with Voice integration...")

	# Test voice chat
	# audio_data = stack_voice.voice_chat("test_prompt.wav", "default")
	# if audio_data:
	# stack_voice.voice_client.download_audio(audio_data, "stack_response.wav")
	# print("Voice chat response saved to stack_response.wav")

	# Test voice command
	# audio_data = stack_voice.voice_command("Write a Python function to calculate factorial", "default")
	# if audio_data:
	# stack_voice.voice_client.download_audio(audio_data, "command_response.wav")
	# print("Voice command response saved to command_response.wav")

	# Test streaming
	# stack_voice.streaming_voice_chat("test_prompt.wav", "default")