Text Generation
Transformers
English
qwen2
code-generation
python
fine-tuning
Qwen
tools
agent-framework
multi-agent
conversational
Eval Results (legacy)
Instructions to use my-ai-stack/Stack-2-9-finetuned with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use my-ai-stack/Stack-2-9-finetuned with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="my-ai-stack/Stack-2-9-finetuned") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("my-ai-stack/Stack-2-9-finetuned") model = AutoModelForCausalLM.from_pretrained("my-ai-stack/Stack-2-9-finetuned") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use my-ai-stack/Stack-2-9-finetuned with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "my-ai-stack/Stack-2-9-finetuned" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "my-ai-stack/Stack-2-9-finetuned", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/my-ai-stack/Stack-2-9-finetuned
- SGLang
How to use my-ai-stack/Stack-2-9-finetuned with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "my-ai-stack/Stack-2-9-finetuned" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "my-ai-stack/Stack-2-9-finetuned", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "my-ai-stack/Stack-2-9-finetuned" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "my-ai-stack/Stack-2-9-finetuned", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use my-ai-stack/Stack-2-9-finetuned with Docker Model Runner:
docker model run hf.co/my-ai-stack/Stack-2-9-finetuned
| // Voice Recording Service - Audio capture for voice input | |
| // | |
| // Handles microphone recording for voice commands using native audio | |
| // or fallback to SoX/arecord on Linux. | |
| import { spawn, type ChildProcess } from 'child_process' | |
| import { readFile } from 'fs/promises' | |
| import { log } from '../utils/logger.js' | |
| // Recording configuration | |
| export const RECORDING_SAMPLE_RATE = 16000 | |
| export const RECORDING_CHANNELS = 1 | |
| const SILENCE_DURATION_SECS = '2.0' | |
| const SILENCE_THRESHOLD = '3%' | |
| export type RecordingAvailability = { | |
| available: boolean | |
| reason: string | null | |
| } | |
| export type RecordingOptions = { | |
| silenceDetection?: boolean | |
| sampleRate?: number | |
| channels?: number | |
| } | |
| /** | |
| * Check if recording dependencies are available | |
| */ | |
| export async function checkRecordingDependencies(): Promise<{ | |
| available: boolean | |
| missing: string[] | |
| }> { | |
| const missing: string[] = [] | |
| // Check for SoX (rec command) | |
| try { | |
| const result = spawn('rec', ['--version'], { stdio: 'ignore' }) | |
| await new Promise<void>((resolve) => { | |
| result.on('close', () => resolve()) | |
| result.on('error', () => resolve()) | |
| setTimeout(() => resolve(), 2000) | |
| }) | |
| } catch { | |
| missing.push('sox (rec command)') | |
| } | |
| return { available: missing.length === 0, missing } | |
| } | |
| /** | |
| * Check if recording is available in current environment | |
| */ | |
| export async function checkRecordingAvailability(): Promise<RecordingAvailability> { | |
| // Check for environment variables that indicate remote/no-audio | |
| if (process.env.CLAUDE_CODE_REMOTE === 'true') { | |
| return { | |
| available: false, | |
| reason: 'Voice mode requires microphone access in local environment', | |
| } | |
| } | |
| // Check for SoX or native audio | |
| const deps = await checkRecordingDependencies() | |
| if (deps.available) { | |
| return { available: true, reason: null } | |
| } | |
| return { | |
| available: false, | |
| reason: `Voice recording requires SoX. Install with: brew install sox (macOS) or sudo apt-get install sox (Linux)`, | |
| } | |
| } | |
| // Active recorder process | |
| let activeRecorder: ChildProcess | null = null | |
| let recordingActive = false | |
| /** | |
| * Start audio recording | |
| * @param onData Callback for audio chunks | |
| * @param onEnd Callback when recording ends | |
| * @param options Recording options | |
| */ | |
| export async function startRecording( | |
| onData: (chunk: Buffer) => void, | |
| onEnd: () => void, | |
| options: RecordingOptions = {}, | |
| ): Promise<boolean> { | |
| const sampleRate = options.sampleRate ?? RECORDING_SAMPLE_RATE | |
| const channels = options.channels ?? RECORDING_CHANNELS | |
| const useSilenceDetection = options.silenceDetection ?? true | |
| log('[voice] Starting recording', { sampleRate, channels, useSilenceDetection }) | |
| // Build SoX command arguments | |
| const args = [ | |
| '-q', // quiet | |
| '--buffer', | |
| '1024', | |
| '-t', | |
| 'raw', | |
| '-r', | |
| String(sampleRate), | |
| '-e', | |
| 'signed', | |
| '-b', | |
| '16', | |
| '-c', | |
| String(channels), | |
| '-', // stdout | |
| ] | |
| // Add silence detection if enabled | |
| if (useSilenceDetection) { | |
| args.push( | |
| 'silence', | |
| '1', | |
| '0.1', | |
| SILENCE_THRESHOLD, | |
| '1', | |
| SILENCE_DURATION_SECS, | |
| SILENCE_THRESHOLD, | |
| ) | |
| } | |
| const child = spawn('rec', args, { | |
| stdio: ['pipe', 'pipe', 'pipe'], | |
| }) | |
| activeRecorder = child | |
| recordingActive = true | |
| child.stdout?.on('data', (chunk: Buffer) => { | |
| onData(chunk) | |
| }) | |
| child.stderr?.on('data', () => {}) // Consume stderr | |
| child.on('close', () => { | |
| activeRecorder = null | |
| recordingActive = false | |
| onEnd() | |
| }) | |
| child.on('error', (err) => { | |
| log('[voice] Recording error', err) | |
| activeRecorder = null | |
| recordingActive = false | |
| onEnd() | |
| }) | |
| return true | |
| } | |
| /** | |
| * Stop the current recording | |
| */ | |
| export function stopRecording(): void { | |
| if (recordingActive && activeRecorder) { | |
| activeRecorder.kill('SIGTERM') | |
| activeRecorder = null | |
| recordingActive = false | |
| } | |
| } | |
| /** | |
| * Check if recording is currently active | |
| */ | |
| export function isRecording(): boolean { | |
| return recordingActive | |
| } | |
| /** | |
| * Convert audio buffer to base64 for API transfer | |
| */ | |
| export function audioToBase64(buffer: Buffer): string { | |
| return buffer.toString('base64') | |
| } | |
| /** | |
| * Convert base64 to audio buffer | |
| */ | |
| export function base64ToAudio(base64: string): Buffer { | |
| return Buffer.from(base64, 'base64') | |
| } |