Text Generation
Transformers
English
qwen2
code-generation
python
fine-tuning
Qwen
tools
agent-framework
multi-agent
conversational
Eval Results (legacy)
Instructions to use my-ai-stack/Stack-2-9-finetuned with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use my-ai-stack/Stack-2-9-finetuned with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="my-ai-stack/Stack-2-9-finetuned") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("my-ai-stack/Stack-2-9-finetuned") model = AutoModelForCausalLM.from_pretrained("my-ai-stack/Stack-2-9-finetuned") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use my-ai-stack/Stack-2-9-finetuned with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "my-ai-stack/Stack-2-9-finetuned" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "my-ai-stack/Stack-2-9-finetuned", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/my-ai-stack/Stack-2-9-finetuned
- SGLang
How to use my-ai-stack/Stack-2-9-finetuned with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "my-ai-stack/Stack-2-9-finetuned" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "my-ai-stack/Stack-2-9-finetuned", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "my-ai-stack/Stack-2-9-finetuned" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "my-ai-stack/Stack-2-9-finetuned", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use my-ai-stack/Stack-2-9-finetuned with Docker Model Runner:
docker model run hf.co/my-ai-stack/Stack-2-9-finetuned
| import requests | |
| from typing import Optional, Union | |
| import io | |
| import json | |
| from voice_client import VoiceClient | |
| class StackWithVoice: | |
| def __init__(self, stack_api_url: str, voice_api_url: str = "http://localhost:8000"): | |
| self.stack_api_url = stack_api_url | |
| self.voice_client = VoiceClient(voice_api_url) | |
| self.session = requests.Session() | |
| # Cache for voice models to avoid repeated API calls | |
| self._voice_cache = {} | |
| def _get_stack_response(self, prompt: str) -> str: | |
| """Get response from Stack 2.9 API""" | |
| try: | |
| response = self.session.post( | |
| f"{self.stack_api_url}/api/chat", | |
| json={"prompt": prompt, "model": "stack-2.9"}, | |
| headers={"Content-Type": "application/json"} | |
| ) | |
| response.raise_for_status() | |
| data = response.json() | |
| return data.get("response", "") | |
| except requests.RequestException as e: | |
| raise Exception(f"Stack API request failed: {str(e)}") | |
| def _get_voice_model(self, voice_name: str) -> Optional[dict]: | |
| """Get voice model info from cache or API""" | |
| if voice_name in self._voice_cache: | |
| return self._voice_cache[voice_name] | |
| try: | |
| voices = self.voice_client.list_voices() | |
| for voice in voices: | |
| if voice == voice_name: | |
| self._voice_cache[voice_name] = {"name": voice_name} | |
| return {"name": voice_name} | |
| return None | |
| except Exception as e: | |
| print(f"Warning: Failed to get voice models: {e}") | |
| return None | |
| def voice_chat(self, prompt_audio_path: str, voice_name: str = "default") -> Optional[bytes]: | |
| """Complete voice chat workflow: audio → text → response → audio""" | |
| # Step 1: Convert audio to text (placeholder - in real implementation, use speech-to-text) | |
| print(f"Converting audio to text: {prompt_audio_path}") | |
| prompt_text = self._audio_to_text(prompt_audio_path) | |
| if not prompt_text: | |
| return None | |
| print(f"User prompt: {prompt_text}") | |
| # Step 2: Get response from Stack 2.9 | |
| print("Getting response from Stack 2.9...") | |
| response_text = self._get_stack_response(prompt_text) | |
| if not response_text: | |
| return None | |
| print(f"Stack response: {response_text}") | |
| # Step 3: Convert response to audio | |
| print(f"Generating voice response with voice: {voice_name}") | |
| audio_data = self.voice_client.synthesize(response_text, voice_name) | |
| return audio_data | |
| def _audio_to_text(self, audio_path: str) -> str: | |
| """Convert audio to text (placeholder implementation)""" | |
| # In a real implementation, you would use a speech-to-text service | |
| # For now, return a placeholder or read from a text file with the same name | |
| text_path = audio_path.replace(".wav", ".txt").replace(".mp3", ".txt") | |
| if os.path.exists(text_path): | |
| with open(text_path, 'r') as f: | |
| return f.read().strip() | |
| # Fallback: return a generic prompt | |
| return "This is a test voice prompt." | |
| def voice_command(self, command: str, voice_name: str = "default") -> Optional[bytes]: | |
| """Execute voice command and get spoken response""" | |
| print(f"Executing voice command: {command}") | |
| # In a real implementation, you would parse the command and execute appropriate actions | |
| # For now, just pass it to Stack 2.9 as-is | |
| response_text = self._get_stack_response(command) | |
| if not response_text: | |
| return None | |
| print(f"Command response: {response_text}") | |
| # Generate voice response | |
| audio_data = self.voice_client.synthesize(response_text, voice_name) | |
| return audio_data | |
| def streaming_voice_chat(self, prompt_audio_path: str, voice_name: str = "default") -> None: | |
| """Stream voice chat (placeholder implementation)""" | |
| print("Starting streaming voice chat...") | |
| # Get initial response | |
| prompt_text = self._audio_to_text(prompt_audio_path) | |
| response_text = self._get_stack_response(prompt_text) | |
| if not response_text: | |
| print("No response received") | |
| return | |
| print("Streaming response:") | |
| print(response_text) | |
| # In a real streaming implementation, you would: | |
| # 1. Stream audio chunks to speech-to-text | |
| # 2. Send partial prompts to Stack 2.9 | |
| # 3. Stream partial responses to TTS | |
| # 4. Play audio as it's generated | |
| # For now, just generate the complete response | |
| audio_data = self.voice_client.synthesize(response_text, voice_name, stream=True) | |
| # Save to file for demonstration | |
| output_path = "./streaming_response.wav" | |
| self.voice_client.download_audio(audio_data, output_path) | |
| print(f"Streaming response saved to: {output_path}") | |
| # Example usage | |
| if __name__ == "__main__": | |
| stack_voice = StackWithVoice( | |
| stack_api_url="http://localhost:5000", # Example Stack 2.9 API URL | |
| voice_api_url="http://localhost:8000" | |
| ) | |
| print("Testing Stack with Voice integration...") | |
| # Test voice chat | |
| # audio_data = stack_voice.voice_chat("test_prompt.wav", "default") | |
| # if audio_data: | |
| # stack_voice.voice_client.download_audio(audio_data, "stack_response.wav") | |
| # print("Voice chat response saved to stack_response.wav") | |
| # Test voice command | |
| # audio_data = stack_voice.voice_command("Write a Python function to calculate factorial", "default") | |
| # if audio_data: | |
| # stack_voice.voice_client.download_audio(audio_data, "command_response.wav") | |
| # print("Voice command response saved to command_response.wav") | |
| # Test streaming | |
| # stack_voice.streaming_voice_chat("test_prompt.wav", "default") |