Instructions to use my-ai-stack/Stack-2-9-finetuned with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use my-ai-stack/Stack-2-9-finetuned with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="my-ai-stack/Stack-2-9-finetuned")
messages = [
    {"role": "user", "content": "Who are you?"},
]
pipe(messages)

# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("my-ai-stack/Stack-2-9-finetuned")
model = AutoModelForCausalLM.from_pretrained("my-ai-stack/Stack-2-9-finetuned")
messages = [
    {"role": "user", "content": "Who are you?"},
]
inputs = tokenizer.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_dict=True,
	return_tensors="pt",
).to(model.device)

outputs = model.generate(**inputs, max_new_tokens=40)
print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:]))

Notebooks
Google Colab
Kaggle
Local Apps

vLLM

How to use my-ai-stack/Stack-2-9-finetuned with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "my-ai-stack/Stack-2-9-finetuned"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "my-ai-stack/Stack-2-9-finetuned",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Use Docker

docker model run hf.co/my-ai-stack/Stack-2-9-finetuned

SGLang

How to use my-ai-stack/Stack-2-9-finetuned with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "my-ai-stack/Stack-2-9-finetuned" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "my-ai-stack/Stack-2-9-finetuned",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "my-ai-stack/Stack-2-9-finetuned" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "my-ai-stack/Stack-2-9-finetuned",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Docker Model Runner
How to use my-ai-stack/Stack-2-9-finetuned with Docker Model Runner:
```
docker model run hf.co/my-ai-stack/Stack-2-9-finetuned
```

Stack-2-9-finetuned

File size: 8,261 Bytes

8f05ad1

"""
Performance Monitoring System

Monitors and tracks model performance metrics.
"""

from typing import Dict, List, Optional, Any
from datetime import datetime, timedelta
from collections import defaultdict
import json
from pathlib import Path


class PerformanceMetric:
    """Represents a single performance metric."""

    def __init__(
        self,
        metric_type: str,
        value: float,
        unit: str = "",
        metadata: Optional[Dict[str, Any]] = None,
    ):
        self.metric_type = metric_type
        self.value = value
        self.unit = unit
        self.metadata = metadata or {}
        self.timestamp = datetime.now()

    def to_dict(self) -> Dict[str, Any]:
        return {
            "metric_type": self.metric_type,
            "value": self.value,
            "unit": self.unit,
            "metadata": self.metadata,
            "timestamp": self.timestamp.isoformat(),
        }


class PerformanceMonitor:
    """Monitors model performance over time."""

    def __init__(
        self,
        storage_path: str = "data/performance",
    ):
        """
        Initialize the performance monitor.

        Args:
            storage_path: Path to store performance data
        """
        self.storage_path = Path(storage_path)
        self.storage_path.mkdir(parents=True, exist_ok=True)

        self.metrics: List[PerformanceMetric] = []
        self._session_stats: Dict[str, Any] = {
            "total_sessions": 0,
            "total_messages": 0,
            "total_conversations": 0,
        }

    def record_metric(
        self,
        metric_type: str,
        value: float,
        unit: str = "",
        metadata: Optional[Dict[str, Any]] = None,
    ) -> None:
        """Record a performance metric."""
        metric = PerformanceMetric(metric_type, value, unit, metadata)
        self.metrics.append(metric)

    def record_response_time(self, seconds: float) -> None:
        """Record response time."""
        self.record_metric("response_time", seconds, "seconds")

    def record_token_count(self, prompt_tokens: int, completion_tokens: int) -> None:
        """Record token count."""
        self.record_metric(
            "prompt_tokens",
            prompt_tokens,
            "tokens",
            {"completion_tokens": completion_tokens},
        )

    def record_successful_interaction(self) -> None:
        """Record a successful interaction."""
        self.record_metric("successful_interaction", 1, "count")

    def record_failed_interaction(self, error_type: str) -> None:
        """Record a failed interaction."""
        self.record_metric(
            "failed_interaction",
            1,
            "count",
            {"error_type": error_type},
        )

    def record_user_rating(self, rating: int) -> None:
        """Record user rating."""
        self.record_metric("user_rating", rating, "stars")

    def get_metrics(
        self,
        metric_type: Optional[str] = None,
        since: Optional[datetime] = None,
        limit: int = 100,
    ) -> List[PerformanceMetric]:
        """Get recorded metrics."""
        results = self.metrics

        if metric_type:
            results = [m for m in results if m.metric_type == metric_type]

        if since:
            results = [m for m in results if m.timestamp >= since]

        return results[-limit:]

    def get_average_response_time(
        self,
        since: Optional[datetime] = None,
    ) -> float:
        """Get average response time."""
        metrics = self.get_metrics("response_time", since=since)
        if not metrics:
            return 0.0
        return sum(m.value for m in metrics) / len(metrics)

    def get_success_rate(
        self,
        since: Optional[datetime] = None,
    ) -> float:
        """Get interaction success rate."""
        successful = len(self.get_metrics("successful_interaction", since=since))
        failed = len(self.get_metrics("failed_interaction", since=since))

        total = successful + failed
        if total == 0:
            return 0.0

        return successful / total

    def get_average_rating(
        self,
        since: Optional[datetime] = None,
    ) -> float:
        """Get average user rating."""
        ratings = self.get_metrics("user_rating", since=since)
        if not ratings:
            return 0.0
        return sum(m.value for m in ratings) / len(ratings)

    def get_summary(
        self,
        since: Optional[datetime] = None,
    ) -> Dict[str, Any]:
        """Get performance summary."""
        since = since or (datetime.now() - timedelta(hours=24))

        return {
            "period": "last_24_hours" if since == datetime.now() - timedelta(hours=24) else "custom",
            "average_response_time": self.get_average_response_time(since),
            "success_rate": self.get_success_rate(since),
            "average_rating": self.get_average_rating(since),
            "total_interactions": len(self.get_metrics("successful_interaction", since=since)) +
                                 len(self.get_metrics("failed_interaction", since=since)),
            "total_tokens": sum(
                m.value for m in self.get_metrics("prompt_tokens", since=since)
            ),
        }

    def increment_session_count(self) -> None:
        """Increment session count."""
        self._session_stats["total_sessions"] += 1

    def increment_message_count(self) -> None:
        """Increment message count."""
        self._session_stats["total_messages"] += 1

    def get_session_stats(self) -> Dict[str, Any]:
        """Get session statistics."""
        return self._session_stats.copy()

    def export_metrics(
        self,
        filepath: Optional[str] = None,
    ) -> str:
        """Export metrics to JSON file."""
        filepath = filepath or str(self.storage_path / f"metrics_{datetime.now().strftime('%Y%m%d')}.json")

        data = {
            "exported_at": datetime.now().isoformat(),
            "metrics": [m.to_dict() for m in self.metrics],
            "session_stats": self._session_stats,
        }

        Path(filepath).write_text(json.dumps(data, indent=2))
        return filepath

    def load_metrics(
        self,
        filepath: str,
    ) -> None:
        """Load metrics from JSON file."""
        data = json.loads(Path(filepath).read_text())

        for metric_data in data.get("metrics", []):
            metric = PerformanceMetric(
                metric_type=metric_data["metric_type"],
                value=metric_data["value"],
                unit=metric_data.get("unit", ""),
                metadata=metric_data.get("metadata", {}),
            )
            metric.timestamp = datetime.fromisoformat(metric_data["timestamp"])
            self.metrics.append(metric)

        if "session_stats" in data:
            self._session_stats.update(data["session_stats"])

    def clear_old_metrics(self, days: int = 30) -> int:
        """Clear metrics older than specified days."""
        cutoff = datetime.now() - timedelta(days=days)
        original_count = len(self.metrics)

        self.metrics = [
            m for m in self.metrics
            if m.timestamp > cutoff
        ]

        return original_count - len(self.metrics)

    def get_trend(
        self,
        metric_type: str,
        hours: int = 24,
    ) -> List[Dict[str, Any]]:
        """Get trend data for a metric."""
        since = datetime.now() - timedelta(hours=hours)
        metrics = self.get_metrics(metric_type, since=since)

        # Group by hour
        hourly_data: Dict[str, List[float]] = defaultdict(list)
        for m in metrics:
            hour_key = m.timestamp.strftime("%Y-%m-%d %H:00")
            hourly_data[hour_key].append(m.value)

        # Calculate hourly averages
        trend = []
        for hour, values in sorted(hourly_data.items()):
            avg = sum(values) / len(values) if values else 0
            trend.append({
                "hour": hour,
                "average": avg,
                "count": len(values),
            })

        return trend

    def __repr__(self) -> str:
        return f"PerformanceMonitor(metrics={len(self.metrics)}, sessions={self._session_stats['total_sessions']})"