Instructions to use my-ai-stack/Stack-2-9-finetuned with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use my-ai-stack/Stack-2-9-finetuned with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="my-ai-stack/Stack-2-9-finetuned")
messages = [
    {"role": "user", "content": "Who are you?"},
]
pipe(messages)

# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("my-ai-stack/Stack-2-9-finetuned")
model = AutoModelForCausalLM.from_pretrained("my-ai-stack/Stack-2-9-finetuned")
messages = [
    {"role": "user", "content": "Who are you?"},
]
inputs = tokenizer.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_dict=True,
	return_tensors="pt",
).to(model.device)

outputs = model.generate(**inputs, max_new_tokens=40)
print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:]))

Notebooks
Google Colab
Kaggle
Local Apps

vLLM

How to use my-ai-stack/Stack-2-9-finetuned with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "my-ai-stack/Stack-2-9-finetuned"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "my-ai-stack/Stack-2-9-finetuned",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Use Docker

docker model run hf.co/my-ai-stack/Stack-2-9-finetuned

SGLang

How to use my-ai-stack/Stack-2-9-finetuned with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "my-ai-stack/Stack-2-9-finetuned" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "my-ai-stack/Stack-2-9-finetuned",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "my-ai-stack/Stack-2-9-finetuned" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "my-ai-stack/Stack-2-9-finetuned",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Docker Model Runner
How to use my-ai-stack/Stack-2-9-finetuned with Docker Model Runner:
```
docker model run hf.co/my-ai-stack/Stack-2-9-finetuned
```

Stack-2-9-finetuned / stack /training /memory.py

walidsobhie-code

refactor: Squeeze folders further - cleaner structure

65888d5 about 2 months ago

raw

history blame

14.7 kB

	"""
	Persistent Memory System for Self-Evolution
	Stores learned patterns and enables similarity-based retrieval using vector embeddings.
	"""

	import json
	import os
	import sqlite3
	import hashlib
	from datetime import datetime
	from typing import Optional, List, Dict, Any
	from pathlib import Path
	import numpy as np


	class PersistentMemory:
	"""Vector-based persistent memory with SQLite storage."""

	def __init__(self, data_dir: str = None):
	if data_dir is None:
	data_dir = os.path.join(os.path.dirname(__file__), 'data')
	self.data_dir = Path(data_dir)
	self.data_dir.mkdir(exist_ok=True, parents=True)

	self.db_path = self.data_dir / 'memory.db'
	self.embeddings_dir = self.data_dir / 'embeddings'
	self.embeddings_dir.mkdir(exist_ok=True)

	self._init_database()

	def _init_database(self):
	"""Initialize SQLite database with memory schema."""
	conn = sqlite3.connect(str(self.db_path))
	cursor = conn.cursor()

	# Core memories table
	cursor.execute('''
	CREATE TABLE IF NOT EXISTS memories (
	id INTEGER PRIMARY KEY AUTOINCREMENT,
	content TEXT NOT NULL,
	embedding_id TEXT UNIQUE,
	category TEXT,
	success_rate REAL DEFAULT 0.5,
	use_count INTEGER DEFAULT 0,
	last_used TEXT,
	created_at TEXT NOT NULL,
	updated_at TEXT NOT NULL,
	metadata TEXT
	)
	''')

	# Lessons learned table
	cursor.execute('''
	CREATE TABLE IF NOT EXISTS lessons (
	id INTEGER PRIMARY KEY AUTOINCREMENT,
	title TEXT NOT NULL,
	description TEXT NOT NULL,
	pattern TEXT,
	success_count INTEGER DEFAULT 0,
	failure_count INTEGER DEFAULT 0,
	contexts TEXT,
	created_at TEXT NOT NULL,
	verified BOOLEAN DEFAULT 0
	)
	''')

	# Improvement suggestions table
	cursor.execute('''
	CREATE TABLE IF NOT EXISTS improvements (
	id INTEGER PRIMARY KEY AUTOINCREMENT,
	suggestion TEXT NOT NULL,
	category TEXT,
	priority INTEGER DEFAULT 5,
	implemented BOOLEAN DEFAULT 0,
	impact_score REAL DEFAULT 0.0,
	created_at TEXT NOT NULL,
	implemented_at TEXT
	)
	''')

	# Session history
	cursor.execute('''
	CREATE TABLE IF NOT EXISTS sessions (
	id INTEGER PRIMARY KEY AUTOINCREMENT,
	session_id TEXT UNIQUE,
	started_at TEXT NOT NULL,
	ended_at TEXT,
	tasks_completed INTEGER DEFAULT 0,
	tasks_failed INTEGER DEFAULT 0,
	learnings TEXT
	)
	''')

	# Indexes for faster lookups
	cursor.execute('CREATE INDEX IF NOT EXISTS idx_memories_category ON memories(category)')
	cursor.execute('CREATE INDEX IF NOT EXISTS idx_memories_embedding ON memories(embedding_id)')
	cursor.execute('CREATE INDEX IF NOT EXISTS idx_lessons_pattern ON lessons(pattern)')

	conn.commit()
	conn.close()

	def _generate_embedding_id(self, content: str) -> str:
	"""Generate a deterministic ID for embedding storage."""
	return hashlib.sha256(content.encode()).hexdigest()[:32]

	def _compute_embedding(self, text: str) -> np.ndarray:
	"""Compute a simple hash-based pseudo-embedding for similarity."""
	# Using hash-based approach - in production, use actual embeddings
	hash_val = int(hashlib.sha256(text.encode()).hexdigest(), 16)
	# Create a simple embedding vector from hash
	np.random.seed(hash_val % (2**32))
	return np.random.randn(128).astype(np.float32)

	def store_memory(self, content: str, category: str = 'general',
	metadata: Dict = None) -> int:
	"""Store a new memory with embedding."""
	embedding_id = self._generate_embedding_id(content)
	embedding = self._compute_embedding(content)

	# Save embedding
	np.save(self.embeddings_dir / f'{embedding_id}.npy', embedding)

	now = datetime.utcnow().isoformat()
	conn = sqlite3.connect(str(self.db_path))
	cursor = conn.cursor()

	cursor.execute('''
	INSERT OR REPLACE INTO memories
	(content, embedding_id, category, created_at, updated_at, metadata)
	VALUES (?, ?, ?, ?, ?, ?)
	''', (content, embedding_id, category, now, now,
	json.dumps(metadata) if metadata else None))

	memory_id = cursor.lastrowid
	conn.commit()
	conn.close()

	return memory_id

	def find_similar(self, query: str, limit: int = 5,
	min_similarity: float = 0.3) -> List[Dict]:
	"""Find similar memories using vector similarity."""
	query_embedding = self._compute_embedding(query)

	memories = self.get_all_memories()
	results = []

	for mem in memories:
	emb_path = self.embeddings_dir / f"{mem['embedding_id']}.npy"
	if emb_path.exists():
	stored_emb = np.load(emb_path)
	similarity = float(np.dot(query_embedding, stored_emb) /
	(np.linalg.norm(query_embedding) * np.linalg.norm(stored_emb) + 1e-8))

	if similarity >= min_similarity:
	results.append({
	**mem,
	'similarity': similarity
	})

	# Sort by similarity and return top results
	results.sort(key=lambda x: x['similarity'], reverse=True)
	return results[:limit]

	def get_all_memories(self, category: str = None) -> List[Dict]:
	"""Retrieve all memories, optionally filtered by category."""
	conn = sqlite3.connect(str(self.db_path))
	cursor = conn.cursor()

	if category:
	cursor.execute('SELECT * FROM memories WHERE category = ?', (category,))
	else:
	cursor.execute('SELECT * FROM memories')

	rows = cursor.fetchall()
	conn.close()

	columns = ['id', 'content', 'embedding_id', 'category', 'success_rate',
	'use_count', 'last_used', 'created_at', 'updated_at', 'metadata']

	return [dict(zip(columns, row)) for row in rows]

	def update_memory_stats(self, memory_id: int, success: bool):
	"""Update success/failure stats for a memory."""
	conn = sqlite3.connect(str(self.db_path))
	cursor = conn.cursor()

	cursor.execute('SELECT success_rate, use_count FROM memories WHERE id = ?', (memory_id,))
	row = cursor.fetchone()

	if row:
	old_rate, use_count = row
	new_count = use_count + 1
	# Running average update
	new_rate = (old_rate * use_count + (1.0 if success else 0.0)) / new_count

	cursor.execute('''
	UPDATE memories
	SET success_rate = ?, use_count = ?, last_used = ?
	WHERE id = ?
	''', (new_rate, new_count, datetime.utcnow().isoformat(), memory_id))

	conn.commit()
	conn.close()

	def add_lesson(self, title: str, description: str, pattern: str = None,
	context: str = None) -> int:
	"""Add a new lesson learned."""
	conn = sqlite3.connect(str(self.db_path))
	cursor = conn.cursor()

	contexts = json.dumps([context]) if context else json.dumps([])

	cursor.execute('''
	INSERT INTO lessons (title, description, pattern, contexts, created_at)
	VALUES (?, ?, ?, ?, ?)
	''', (title, description, pattern, contexts, datetime.utcnow().isoformat()))

	lesson_id = cursor.lastrowid
	conn.commit()
	conn.close()

	return lesson_id

	def update_lesson_stats(self, lesson_id: int, success: bool):
	"""Update lesson success/failure counts."""
	conn = sqlite3.connect(str(self.db_path))
	cursor = conn.cursor()

	if success:
	cursor.execute('UPDATE lessons SET success_count = success_count + 1 WHERE id = ?', (lesson_id,))
	else:
	cursor.execute('UPDATE lessons SET failure_count = failure_count + 1 WHERE id = ?', (lesson_id,))

	conn.commit()
	conn.close()

	def get_lessons(self, verified_only: bool = False) -> List[Dict]:
	"""Retrieve lessons, optionally filtered by verification status."""
	conn = sqlite3.connect(str(self.db_path))
	cursor = conn.cursor()

	if verified_only:
	cursor.execute('SELECT * FROM lessons WHERE verified = 1')
	else:
	cursor.execute('SELECT * FROM lessons')

	rows = cursor.fetchall()
	conn.close()

	columns = ['id', 'title', 'description', 'pattern', 'success_count',
	'failure_count', 'contexts', 'created_at', 'verified']

	return [dict(zip(columns, row)) for row in rows]

	def add_improvement(self, suggestion: str, category: str = 'general',
	priority: int = 5) -> int:
	"""Add an improvement suggestion."""
	conn = sqlite3.connect(str(self.db_path))
	cursor = conn.cursor()

	cursor.execute('''
	INSERT INTO improvements (suggestion, category, priority, created_at)
	VALUES (?, ?, ?, ?)
	''', (suggestion, category, priority, datetime.utcnow().isoformat()))

	imp_id = cursor.lastrowid
	conn.commit()
	conn.close()

	return imp_id

	def mark_improvement_implemented(self, improvement_id: int, impact_score: float = 0.0):
	"""Mark an improvement as implemented."""
	conn = sqlite3.connect(str(self.db_path))
	cursor = conn.cursor()

	cursor.execute('''
	UPDATE improvements
	SET implemented = 1, implemented_at = ?, impact_score = ?
	WHERE id = ?
	''', (datetime.utcnow().isoformat(), impact_score, improvement_id))

	conn.commit()
	conn.close()

	def get_pending_improvements(self) -> List[Dict]:
	"""Get unimplemented improvements sorted by priority."""
	conn = sqlite3.connect(str(self.db_path))
	cursor = conn.cursor()

	cursor.execute('''
	SELECT * FROM improvements
	WHERE implemented = 0
	ORDER BY priority DESC
	''')

	rows = cursor.fetchall()
	conn.close()

	columns = ['id', 'suggestion', 'category', 'priority', 'implemented',
	'impact_score', 'created_at', 'implemented_at']

	return [dict(zip(columns, row)) for row in rows]

	def log_session(self, session_id: str) -> int:
	"""Log the start of a new session."""
	conn = sqlite3.connect(str(self.db_path))
	cursor = conn.cursor()

	cursor.execute('''
	INSERT OR REPLACE INTO sessions (session_id, started_at)
	VALUES (?, ?)
	''', (session_id, datetime.utcnow().isoformat()))

	session_id_db = cursor.lastrowid
	conn.commit()
	conn.close()

	return session_id_db

	def end_session(self, session_id: str, tasks_completed: int,
	tasks_failed: int, learnings: str = None):
	"""End a session and record its stats."""
	conn = sqlite3.connect(str(self.db_path))
	cursor = conn.cursor()

	cursor.execute('''
	UPDATE sessions
	SET ended_at = ?, tasks_completed = ?, tasks_failed = ?, learnings = ?
	WHERE session_id = ?
	''', (datetime.utcnow().isoformat(), tasks_completed, tasks_failed,
	learnings, session_id))

	conn.commit()
	conn.close()

	def get_stats(self) -> Dict[str, Any]:
	"""Get overall system statistics."""
	conn = sqlite3.connect(str(self.db_path))
	cursor = conn.cursor()

	stats = {}

	# Memory stats
	cursor.execute('SELECT COUNT(*), AVG(success_rate), SUM(use_count) FROM memories')
	mem_stats = cursor.fetchone()
	stats['total_memories'] = mem_stats[0]
	stats['avg_success_rate'] = round(mem_stats[1] or 0, 3)
	stats['total_uses'] = mem_stats[2] or 0

	# Lesson stats
	cursor.execute('SELECT COUNT(*), SUM(success_count), SUM(failure_count) FROM lessons')
	lesson_stats = cursor.fetchone()
	stats['total_lessons'] = lesson_stats[0]
	stats['lesson_successes'] = lesson_stats[1] or 0
	stats['lesson_failures'] = lesson_stats[2] or 0

	# Improvement stats
	cursor.execute('SELECT COUNT(*) FROM improvements WHERE implemented = 0')
	stats['pending_improvements'] = cursor.fetchone()[0]

	cursor.execute('SELECT COUNT(*), AVG(impact_score) FROM improvements WHERE implemented = 1')
	imp_stats = cursor.fetchone()
	stats['implemented_improvements'] = imp_stats[0]
	stats['avg_impact'] = round(imp_stats[1] or 0, 3)

	# Session stats
	cursor.execute('SELECT SUM(tasks_completed), SUM(tasks_failed) FROM sessions')
	session_stats = cursor.fetchone()
	stats['total_tasks_completed'] = session_stats[0] or 0
	stats['total_tasks_failed'] = session_stats[1] or 0

	if stats['total_tasks_completed'] + stats['total_tasks_failed'] > 0:
	stats['overall_success_rate'] = round(
	stats['total_tasks_completed'] /
	(stats['total_tasks_completed'] + stats['total_tasks_failed']), 3)
	else:
	stats['overall_success_rate'] = 0.0

	conn.close()

	return stats


	# Global instance for easy importing
	_memory_instance = None


	def get_memory() -> PersistentMemory:
	"""Get or create the global memory instance."""
	global _memory_instance
	if _memory_instance is None:
	_memory_instance = PersistentMemory()
	return _memory_instance