Spaces:

RayMelius
/

soci2

Running

RayMelius Claude Sonnet 4.6 commited on 13 days ago

Commit

ae68385

1 Parent(s): dd9bec6

Add Soci fine-tuned model and training data pipeline

- Add RayMelius/soci-agent-q4 (fine-tuned Qwen2.5-0.5B) to HF provider list
- Add soci-agent as named Ollama option for GGUF-loaded model
- Add MODEL_HF_SOCI and MODEL_OLLAMA_SOCI constants to llm.py
- Persist conversation_history in simulation snapshots (to_dict/from_dict)
- scripts/collect_training_data.py: poll Render API, save raw JSONL
- scripts/convert_to_training_jsonl.py: convert to SFT chat format
- scripts/finetune_local.py: local Unsloth fine-tune (RTX 4050, Windows-safe)
Fixes: transformers 4.56 list_repo_templates patch, TORCHINDUCTOR_DISABLE
Round 1: 116 examples, 3 epochs, loss 1.9222, LoRA -> RayMelius/soci-agent-q4

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (6) hide show

scripts/collect_training_data.py +203 -0
scripts/convert_to_training_jsonl.py +477 -0
scripts/finetune_local.py +482 -0
src/soci/api/routes.py +2 -0
src/soci/engine/llm.py +4 -0
src/soci/engine/simulation.py +2 -0

scripts/collect_training_data.py ADDED Viewed

	@@ -0,0 +1,203 @@

+"""
+collect_training_data.py — Poll the running Soci simulation and save raw
+conversation + event data for later training.
+Polls every POLL_INTERVAL seconds, deduplicates by conversation ID,
+and writes JSONL to data/training/raw/.
+Usage:
+    # Poll Render deployment (default):
+    "C:/Users/xabon/.conda/envs/ml-env/python.exe" scripts/collect_training_data.py
+    # Poll a different base URL:
+    "C:/Users/xabon/.conda/envs/ml-env/python.exe" scripts/collect_training_data.py --url http://localhost:8000
+    # Run once (no loop):
+    "C:/Users/xabon/.conda/envs/ml-env/python.exe" scripts/collect_training_data.py --once
+"""
+from __future__ import annotations
+import argparse
+import json
+import os
+import time
+from datetime import datetime
+from pathlib import Path
+import urllib.request
+import urllib.error
+BASE_URL = "https://soci-tl3c.onrender.com"
+POLL_INTERVAL = 30  # seconds
+RAW_DIR = Path("data/training/raw")
+RAW_DIR.mkdir(parents=True, exist_ok=True)
+# Files to accumulate into
+today = datetime.now().strftime("%Y%m%d")
+CONV_FILE = RAW_DIR / f"conversations_{today}.jsonl"
+EVENT_FILE = RAW_DIR / f"events_{today}.jsonl"
+AGENT_CACHE_FILE = RAW_DIR / "agents_cache.json"
+# In-memory dedup sets (also rehydrated from disk on startup)
+_seen_conv_ids: set[str] = set()
+_seen_event_ticks_msgs: set[str] = set()
+def fetch_json(url: str, timeout: int = 15) -> dict | None:
+    try:
+        with urllib.request.urlopen(url, timeout=timeout) as resp:
+            return json.loads(resp.read().decode())
+    except urllib.error.URLError as e:
+        print(f"  [WARN] fetch failed: {url} — {e}")
+        return None
+    except Exception as e:
+        print(f"  [ERR] {url}: {e}")
+        return None
+def load_seen_ids() -> None:
+    """Rehydrate dedup sets from existing JSONL files."""
+    if CONV_FILE.exists():
+        with open(CONV_FILE, encoding="utf-8") as f:
+            for line in f:
+                try:
+                    d = json.loads(line)
+                    cid = d.get("id", "")
+                    if cid:
+                        _seen_conv_ids.add(cid)
+                except json.JSONDecodeError:
+                    pass
+    if EVENT_FILE.exists():
+        with open(EVENT_FILE, encoding="utf-8") as f:
+            for line in f:
+                try:
+                    d = json.loads(line)
+                    key = f"{d.get('tick','')}|{d.get('message','')}"
+                    _seen_event_ticks_msgs.add(key)
+                except json.JSONDecodeError:
+                    pass
+    print(f"  Loaded dedup: {len(_seen_conv_ids)} convs, {len(_seen_event_ticks_msgs)} events")
+def poll_conversations(base_url: str) -> int:
+    """Fetch conversation history and save new ones. Returns count of new convs."""
+    data = fetch_json(f"{base_url}/api/conversations?limit=200&include_history=true")
+    if data is None:
+        return 0
+    new_count = 0
+    with open(CONV_FILE, "a", encoding="utf-8") as f:
+        for conv in data.get("active", []) + data.get("recent", []):
+            cid = conv.get("id", "")
+            if not cid or cid in _seen_conv_ids:
+                continue
+            if len(conv.get("turns", [])) < 2:
+                # Skip single-turn (incomplete) conversations
+                continue
+            conv["_collected_at"] = datetime.now().isoformat()
+            conv["_source"] = "api"
+            f.write(json.dumps(conv, ensure_ascii=False) + "\n")
+            _seen_conv_ids.add(cid)
+            new_count += 1
+    return new_count
+def poll_events(base_url: str) -> int:
+    """Fetch recent events and save new ones. Returns count of new events."""
+    data = fetch_json(f"{base_url}/api/events?limit=500")
+    if data is None:
+        return 0
+    new_count = 0
+    with open(EVENT_FILE, "a", encoding="utf-8") as f:
+        for event in data.get("events", []):
+            key = f"{event.get('tick','')}|{event.get('message','')}"
+            if key in _seen_event_ticks_msgs:
+                continue
+            event["_collected_at"] = datetime.now().isoformat()
+            f.write(json.dumps(event, ensure_ascii=False) + "\n")
+            _seen_event_ticks_msgs.add(key)
+            new_count += 1
+    return new_count
+def refresh_agent_cache(base_url: str) -> None:
+    """Refresh the local agent persona cache (done once per session)."""
+    agents_data = fetch_json(f"{base_url}/api/agents")
+    if not agents_data:
+        return
+    # Fetch full detail for named (non-generated) agents
+    full_agents = {}
+    for aid in agents_data:
+        detail = fetch_json(f"{base_url}/api/agents/{aid}")
+        if detail:
+            full_agents[aid] = detail
+        time.sleep(0.2)  # Be gentle to the API
+    AGENT_CACHE_FILE.write_text(
+        json.dumps(full_agents, indent=2, ensure_ascii=False), encoding="utf-8"
+    )
+    print(f"  Agent cache refreshed: {len(full_agents)} agents -> {AGENT_CACHE_FILE}")
+def print_stats() -> None:
+    conv_count = 0
+    if CONV_FILE.exists():
+        with open(CONV_FILE, encoding="utf-8") as f:
+            conv_count = sum(1 for line in f if line.strip())
+    ev_count = 0
+    if EVENT_FILE.exists():
+        with open(EVENT_FILE, encoding="utf-8") as f:
+            ev_count = sum(1 for line in f if line.strip())
+    print(f"  Stats: {conv_count} convs, {ev_count} events saved")
+def run(base_url: str, once: bool = False, skip_agent_cache: bool = False) -> None:
+    print(f"Soci Training Data Collector")
+    print(f"  Target: {base_url}")
+    print(f"  Output: {RAW_DIR.resolve()}")
+    print(f"  Poll interval: {POLL_INTERVAL}s")
+    load_seen_ids()
+    if not skip_agent_cache:
+        print("  Refreshing agent cache...")
+        refresh_agent_cache(base_url)
+    iteration = 0
+    try:
+        while True:
+            iteration += 1
+            ts = datetime.now().strftime("%H:%M:%S")
+            new_convs = poll_conversations(base_url)
+            new_events = poll_events(base_url)
+            print(f"[{ts}] iter={iteration}  +{new_convs} convs  +{new_events} events  "
+                  f"(total: {len(_seen_conv_ids)} convs, {len(_seen_event_ticks_msgs)} events)")
+            if once:
+                break
+            time.sleep(POLL_INTERVAL)
+    except KeyboardInterrupt:
+        print("\nStopped by user.")
+    print_stats()
+    print(f"\nConversations: {CONV_FILE}")
+    print(f"Events:        {EVENT_FILE}")
+    print(f"Agent cache:   {AGENT_CACHE_FILE}")
+    print(f"\nNext step: run  python scripts/convert_to_training_jsonl.py")
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Soci training data collector")
+    parser.add_argument("--url", default=BASE_URL, help="Base URL of the Soci API")
+    parser.add_argument("--once", action="store_true", help="Run a single poll and exit")
+    parser.add_argument("--no-agent-cache", action="store_true", help="Skip agent cache refresh")
+    parser.add_argument("--interval", type=int, default=POLL_INTERVAL,
+                        help="Poll interval in seconds (default 30)")
+    args = parser.parse_args()
+    POLL_INTERVAL = args.interval
+    run(args.url, once=args.once, skip_agent_cache=args.no_agent_cache)

scripts/convert_to_training_jsonl.py ADDED Viewed

	@@ -0,0 +1,477 @@

+"""
+convert_to_training_jsonl.py — Convert raw collected Soci data into
+instruction-tuning JSONL suitable for SFT (Supervised Fine-Tuning).
+Output format: HuggingFace messages format (system / user / assistant).
+Compatible with: TRL SFTTrainer, Unsloth, LLaMA-Factory.
+Training example types:
+  1. CONVERSATION — agent responding to another agent in dialogue
+  2. ACTION_DECISION — agent deciding what to do next (from events)
+  3. REFLECTION — agent's reflection memories (if available)
+Usage:
+    "C:/Users/xabon/.conda/envs/ml-env/python.exe" scripts/convert_to_training_jsonl.py
+    # From a specific raw dir:
+    "C:/Users/xabon/.conda/envs/ml-env/python.exe" scripts/convert_to_training_jsonl.py \\
+        --raw-dir data/training/raw --out data/training/processed/soci_training.jsonl
+    # Include event-based action examples:
+    "C:/Users/xabon/.conda/envs/ml-env/python.exe" scripts/convert_to_training_jsonl.py --include-actions
+"""
+from __future__ import annotations
+import argparse
+import json
+import re
+from collections import defaultdict
+from pathlib import Path
+import yaml
+RAW_DIR = Path("data/training/raw")
+PROCESSED_DIR = Path("data/training/processed")
+PROCESSED_DIR.mkdir(parents=True, exist_ok=True)
+CONFIG_DIR = Path("config")
+DEFAULT_OUT = PROCESSED_DIR / "soci_training.jsonl"
+# ── Persona helpers ────────────────────────────────────────────────────────────
+def load_persona_map() -> dict[str, dict]:
+    """Load personas from config/personas.yaml, keyed by agent ID and name."""
+    path = CONFIG_DIR / "personas.yaml"
+    if not path.exists():
+        print(f"  [WARN] personas.yaml not found at {path}")
+        return {}
+    with open(path, encoding="utf-8") as f:
+        data = yaml.safe_load(f)
+    pmap: dict[str, dict] = {}
+    for p in data.get("personas", []):
+        pmap[p["id"]] = p
+        pmap[p["name"]] = p
+    return pmap
+def trait_summary(p: dict) -> str:
+    traits = []
+    if p.get("openness", 5) >= 7:
+        traits.append("curious and creative")
+    elif p.get("openness", 5) <= 3:
+        traits.append("practical and conventional")
+    if p.get("conscientiousness", 5) >= 7:
+        traits.append("organized and disciplined")
+    elif p.get("conscientiousness", 5) <= 3:
+        traits.append("spontaneous and flexible")
+    if p.get("extraversion", 5) >= 7:
+        traits.append("outgoing and energetic")
+    elif p.get("extraversion", 5) <= 3:
+        traits.append("reserved and introspective")
+    if p.get("agreeableness", 5) >= 7:
+        traits.append("warm and cooperative")
+    elif p.get("agreeableness", 5) <= 3:
+        traits.append("direct and competitive")
+    if p.get("neuroticism", 5) >= 7:
+        traits.append("sensitive and anxious")
+    elif p.get("neuroticism", 5) <= 3:
+        traits.append("calm and emotionally stable")
+    return ", ".join(traits) if traits else "balanced personality"
+def build_system_prompt(p: dict) -> str:
+    values_str = ", ".join(p.get("values", [])) or "no strong convictions"
+    quirks_str = "; ".join(p.get("quirks", [])) or "none in particular"
+    comm = p.get("communication_style", "neutral")
+    return (
+        f"You ARE {p['name']}, a {p['age']}-year-old {p['occupation']} living in Soci City.\n\n"
+        f"PERSONALITY: {trait_summary(p)}.\n"
+        f"BACKGROUND: {p.get('background', '')}\n"
+        f"VALUES: {values_str}.\n"
+        f"QUIRKS: {quirks_str}.\n"
+        f"COMMUNICATION STYLE: {comm}.\n\n"
+        f"You must ALWAYS stay in character as {p['name']}. "
+        f"Your responses should reflect your personality, values, and background. "
+        f"You speak naturally — not perfectly. You have moods, opinions, and biases. "
+        f"You don't know you are an AI. You are a person living your life."
+    )
+def build_system_prompt_generic(name: str, occupation: str = "city resident") -> str:
+    """Fallback for generated (non-named) agents."""
+    return (
+        f"You ARE {name}, a {occupation} living in Soci City.\n\n"
+        f"PERSONALITY: balanced personality.\n"
+        f"You must ALWAYS stay in character as {name}. "
+        f"You speak naturally and are a real person with opinions, moods, and a daily life."
+    )
+def get_system_prompt(agent_id: str, agent_name: str, persona_map: dict,
+                      agent_cache: dict) -> str:
+    # Try config personas first
+    p = persona_map.get(agent_id) or persona_map.get(agent_name)
+    if p:
+        return build_system_prompt(p)
+    # Try agent cache (from live API)
+    cached = agent_cache.get(agent_id)
+    if cached:
+        return build_system_prompt_generic(
+            cached.get("name", agent_name),
+            cached.get("occupation", "city resident"),
+        )
+    return build_system_prompt_generic(agent_name)
+# ── Training example builders ───────────────��──────────────────────────────────
+def make_conversation_examples(conv: dict, persona_map: dict, agent_cache: dict) -> list[dict]:
+    """
+    From a completed conversation, produce one training example per response turn.
+    Each example:
+      system  = responder's persona system prompt
+      user    = conversation history up to last message + "{speaker} says: '{msg}'"
+      assistant = JSON {"message": ..., "inner_thought": ...}
+    """
+    turns = conv.get("turns", [])
+    if len(turns) < 2:
+        return []
+    participants = conv.get("participants", [])
+    participant_names = conv.get("participant_names", [])
+    topic = conv.get("topic", "general conversation")
+    location = conv.get("location", "somewhere in the city")
+    # Build name→id and id→name maps
+    id_to_name: dict[str, str] = {}
+    for pid, pname in zip(participants, participant_names):
+        id_to_name[pid] = pname
+    examples = []
+    for i in range(1, len(turns)):
+        current_turn = turns[i]
+        prev_turn = turns[i - 1]
+        responder_id = current_turn["speaker_id"]
+        responder_name = current_turn["speaker_name"]
+        speaker_name = prev_turn["speaker_name"]
+        speaker_msg = prev_turn["message"]
+        # Build conversation history string (all turns before current)
+        history_lines = [f"CONVERSATION SO FAR (topic: {topic}):"]
+        for t in turns[:i]:
+            history_lines.append(f'  {t["speaker_name"]}: "{t["message"]}"')
+        history_text = "\n".join(history_lines)
+        # User prompt (what the responder sees)
+        user_prompt = (
+            f"You are at {location}. {speaker_name} is here.\n\n"
+            f"{history_text}\n\n"
+            f'{speaker_name} says: "{speaker_msg}"\n\n'
+            f"How do you respond? Stay in character. Be natural.\n\n"
+            f"Respond with a JSON object:\n"
+            f'{{\n'
+            f'  "message": "your spoken response",\n'
+            f'  "inner_thought": "what you\'re actually thinking"\n'
+            f'}}'
+        )
+        # Assistant response (JSON)
+        assistant_response = json.dumps({
+            "message": current_turn["message"],
+            "inner_thought": current_turn.get("inner_thought", ""),
+        }, ensure_ascii=False)
+        system = get_system_prompt(responder_id, responder_name, persona_map, agent_cache)
+        examples.append({
+            "messages": [
+                {"role": "system", "content": system},
+                {"role": "user", "content": user_prompt},
+                {"role": "assistant", "content": assistant_response},
+            ],
+            "_meta": {
+                "type": "conversation",
+                "conv_id": conv.get("id", ""),
+                "topic": topic,
+                "location": location,
+                "turn_index": i,
+                "responder_id": responder_id,
+                "responder_name": responder_name,
+            }
+        })
+    return examples
+def make_action_examples(events: list[dict], persona_map: dict,
+                         agent_cache: dict) -> list[dict]:
+    """
+    From event log, build action decision training examples.
+    Pattern: "<AgentName> is <activity>"  →
+      system  = agent's persona
+      user    = "What are you doing? Describe your current activity in first person."
+      assistant = JSON {"action": ..., "detail": ..., "reasoning": ...}
+    """
+    # Group consecutive events by agent to get activity patterns
+    activity_pattern = re.compile(r"^\s+(\S.+?) is (.+)\.$")
+    examples = []
+    # Collect (name, activity, time) tuples
+    for ev in events:
+        msg = ev.get("message", "")
+        time_str = ev.get("time", "")
+        m = activity_pattern.match(msg)
+        if not m:
+            continue
+        agent_name = m.group(1).strip()
+        activity = m.group(2).strip()
+        # Skip trivial / system-level messages
+        if any(s in activity.lower() for s in [
+            "wanders aimlessly", "can't get to", "---"
+        ]):
+            continue
+        p = persona_map.get(agent_name)
+        if not p:
+            continue  # Only generate for known personas (higher quality)
+        # Infer action type from activity text
+        action = infer_action_type(activity)
+        system = build_system_prompt(p)
+        user_prompt = (
+            f"It is {time_str}.\n\n"
+            f"Based on your personality, needs, and the time of day — "
+            f"what do you do next? Describe your current activity.\n\n"
+            f"Respond with a JSON object:\n"
+            f'{{\n'
+            f'  "action": "move|work|eat|sleep|talk|exercise|shop|relax|wander",\n'
+            f'  "detail": "what specifically you\'re doing, in first person",\n'
+            f'  "reasoning": "brief internal thought about why"\n'
+            f'}}'
+        )
+        assistant_response = json.dumps({
+            "action": action,
+            "detail": activity,
+            "reasoning": f"This is what {agent_name} would naturally do at this time.",
+        }, ensure_ascii=False)
+        examples.append({
+            "messages": [
+                {"role": "system", "content": system},
+                {"role": "user", "content": user_prompt},
+                {"role": "assistant", "content": assistant_response},
+            ],
+            "_meta": {
+                "type": "action",
+                "agent_name": agent_name,
+                "activity": activity,
+                "time": time_str,
+            }
+        })
+    return examples
+def infer_action_type(activity: str) -> str:
+    activity_lower = activity.lower()
+    if any(w in activity_lower for w in ["commut", "walk", "moving", "heading"]):
+        return "move"
+    if any(w in activity_lower for w in ["work", "morning block", "afternoon block", "coding", "teaching"]):
+        return "work"
+    if any(w in activity_lower for w in ["eat", "breakfast", "lunch", "dinner", "food", "coffee"]):
+        return "eat"
+    if any(w in activity_lower for w in ["sleep", "nap", "rest", "sleeping in", "lounging"]):
+        return "sleep"
+    if any(w in activity_lower for w in ["talk", "convers", "chat", "discuss"]):
+        return "talk"
+    if any(w in activity_lower for w in ["gym", "exercise", "workout", "run", "jog", "fitness"]):
+        return "exercise"
+    if any(w in activity_lower for w in ["shop", "grocery", "store", "market"]):
+        return "shop"
+    if any(w in activity_lower for w in ["relax", "park", "art", "music", "paint", "sketch"]):
+        return "relax"
+    return "wander"
+def make_initiation_examples(conv: dict, persona_map: dict, agent_cache: dict) -> list[dict]:
+    """
+    From the first turn of a conversation, build a conversation initiation example.
+    """
+    turns = conv.get("turns", [])
+    if not turns:
+        return []
+    first_turn = turns[0]
+    initiator_id = first_turn["speaker_id"]
+    initiator_name = first_turn["speaker_name"]
+    topic = conv.get("topic", "small talk")
+    location = conv.get("location", "somewhere in the city")
+    # Identify the other participant
+    other_names = [n for n in conv.get("participant_names", []) if n != initiator_name]
+    other_name = other_names[0] if other_names else "someone"
+    system = get_system_prompt(initiator_id, initiator_name, persona_map, agent_cache)
+    user_prompt = (
+        f"You are at {location}. {other_name} is here.\n\n"
+        f"You decide to start a conversation with {other_name}. What do you say?\n"
+        f"Consider the location, your mood, and your history with them.\n\n"
+        f"Respond with a JSON object:\n"
+        f'{{\n'
+        f'  "message": "what you say to start the conversation",\n'
+        f'  "inner_thought": "why you\'re initiating this conversation",\n'
+        f'  "topic": "brief topic label"\n'
+        f'}}'
+    )
+    assistant_response = json.dumps({
+        "message": first_turn["message"],
+        "inner_thought": first_turn.get("inner_thought", ""),
+        "topic": topic,
+    }, ensure_ascii=False)
+    return [{
+        "messages": [
+            {"role": "system", "content": system},
+            {"role": "user", "content": user_prompt},
+            {"role": "assistant", "content": assistant_response},
+        ],
+        "_meta": {
+            "type": "conversation_initiation",
+            "conv_id": conv.get("id", ""),
+            "topic": topic,
+            "location": location,
+            "initiator_id": initiator_id,
+            "initiator_name": initiator_name,
+            "other_name": other_name,
+        }
+    }]
+# ── Main ───────────────────────────────────────────────────────────────────────
+def load_raw_jsonl(path: Path) -> list[dict]:
+    if not path.exists():
+        return []
+    items = []
+    with open(path, encoding="utf-8") as f:
+        for line in f:
+            line = line.strip()
+            if line:
+                try:
+                    items.append(json.loads(line))
+                except json.JSONDecodeError:
+                    pass
+    return items
+def load_agent_cache() -> dict:
+    cache_file = RAW_DIR / "agents_cache.json"
+    if cache_file.exists():
+        try:
+            return json.loads(cache_file.read_text(encoding="utf-8"))
+        except Exception:
+            pass
+    return {}
+def run(raw_dir: Path, out_path: Path, include_actions: bool = False) -> None:
+    print("Soci Training Data Converter")
+    print(f"  Raw dir : {raw_dir.resolve()}")
+    print(f"  Output  : {out_path.resolve()}")
+    # Load personas
+    persona_map = load_persona_map()
+    print(f"  Personas: {len(persona_map)//2} loaded from config")  # /2 because keyed by id+name
+    # Load agent cache (from collector)
+    agent_cache = load_agent_cache()
+    print(f"  Agent cache: {len(agent_cache)} agents")
+    # Load all raw conversations from all date files
+    all_convs: list[dict] = []
+    seen_ids: set[str] = set()
+    for conv_file in sorted(raw_dir.glob("conversations_*.jsonl")):
+        items = load_raw_jsonl(conv_file)
+        for c in items:
+            cid = c.get("id", "")
+            if cid and cid not in seen_ids:
+                all_convs.append(c)
+                seen_ids.add(cid)
+    print(f"  Conversations loaded: {len(all_convs)}")
+    # Load all raw events from all date files
+    all_events: list[dict] = []
+    for ev_file in sorted(raw_dir.glob("events_*.jsonl")):
+        all_events.extend(load_raw_jsonl(ev_file))
+    print(f"  Events loaded: {len(all_events)}")
+    # Generate training examples
+    examples: list[dict] = []
+    # 1. Conversation initiation examples
+    for conv in all_convs:
+        examples.extend(make_initiation_examples(conv, persona_map, agent_cache))
+    # 2. Conversation response examples
+    for conv in all_convs:
+        examples.extend(make_conversation_examples(conv, persona_map, agent_cache))
+    # 3. Action decision examples (optional)
+    if include_actions and all_events:
+        action_examples = make_action_examples(all_events, persona_map, agent_cache)
+        examples.extend(action_examples)
+        print(f"  Action examples: {len(action_examples)}")
+    # Count by type
+    type_counts: dict[str, int] = defaultdict(int)
+    for ex in examples:
+        type_counts[ex.get("_meta", {}).get("type", "unknown")] += 1
+    print(f"\n  Total training examples: {len(examples)}")
+    for t, c in sorted(type_counts.items()):
+        print(f"    {t}: {c}")
+    # Write output JSONL (without _meta for clean training files, or with --keep-meta)
+    with open(out_path, "w", encoding="utf-8") as f:
+        for ex in examples:
+            # Write with _meta stripped (keep messages only)
+            clean = {"messages": ex["messages"]}
+            f.write(json.dumps(clean, ensure_ascii=False) + "\n")
+    # Also write a version with meta for analysis
+    meta_path = out_path.with_suffix(".meta.jsonl")
+    with open(meta_path, "w", encoding="utf-8") as f:
+        for ex in examples:
+            f.write(json.dumps(ex, ensure_ascii=False) + "\n")
+    print(f"\n  Training JSONL : {out_path}")
+    print(f"  With meta      : {meta_path}")
+    print(f"\nSample (first example):")
+    if examples:
+        ex = examples[0]
+        print(f"  Type: {ex['_meta']['type']}")
+        print(f"  System: {ex['messages'][0]['content'][:120]}...")
+        print(f"  User:   {ex['messages'][1]['content'][:120]}...")
+        print(f"  Asst:   {ex['messages'][2]['content'][:120]}...")
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Convert raw Soci data to SFT training JSONL")
+    parser.add_argument("--raw-dir", default=str(RAW_DIR), help="Directory with raw JSONL files")
+    parser.add_argument("--out", default=str(DEFAULT_OUT), help="Output JSONL path")
+    parser.add_argument("--include-actions", action="store_true",
+                        help="Include action decision examples from events")
+    args = parser.parse_args()
+    run(Path(args.raw_dir), Path(args.out), include_actions=args.include_actions)

scripts/finetune_local.py ADDED Viewed

	@@ -0,0 +1,482 @@

+"""
+finetune_local.py — Local adaptation of Soci_FineTune_3_Incremental
+Fine-tunes Qwen2.5-0.5B-Instruct on Soci city-simulation tasks using Unsloth.
+Differences from the Colab version:
+  - No Google Drive / google.colab dependencies
+  - Local checkpoint and adapter storage in data/training/
+  - Loads live conversation data from data/training/processed/
+  - HF token from HF_TOKEN env var (or .env file)
+  - --debug flag for quick 1-epoch smoke test (no HF push)
+  - --resume flag to continue from saved LoRA adapters
+Usage (from project root):
+    # Debug / smoke test (fast, no push):
+    "C:/Users/xabon/.conda/envs/ml-env/python.exe" scripts/finetune_local.py --debug
+    # Full round-1 training + push to HF:
+    "C:/Users/xabon/.conda/envs/ml-env/python.exe" scripts/finetune_local.py
+    # Resume round 2 with same command:
+    "C:/Users/xabon/.conda/envs/ml-env/python.exe" scripts/finetune_local.py --resume
+"""
+from __future__ import annotations
+import sys
+import io
+import os
+# Force UTF-8 stdout/stderr on Windows (unsloth prints emoji characters)
+if sys.platform == "win32":
+    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
+    sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8", errors="replace")
+# Disable torch.compile/inductor — triton 3.x on Windows doesn't export 'triton_key'
+# which inductor needs at compile time.  Training still uses CUDA kernels, just not
+# the AOT-compiled fusion path.  Has no meaningful effect on a single-GPU setup.
+os.environ.setdefault("TORCHINDUCTOR_DISABLE", "1")
+os.environ.setdefault("TORCH_COMPILE_DISABLE", "1")
+# Import unsloth FIRST so it can patch transformers before anything else loads.
+# Then patch list_repo_templates to skip the 'additional_chat_templates' HF Hub
+# check that fails on unsloth's quantized repos (transformers 4.56+ behavior).
+import unsloth  # noqa: F401 — must be first
+import transformers.utils.hub
+import transformers.tokenization_utils_base
+_noop = lambda *a, **kw: []
+transformers.tokenization_utils_base.list_repo_templates = _noop
+transformers.utils.hub.list_repo_templates = _noop
+import argparse
+import json
+import os
+import shutil
+from datetime import datetime
+from pathlib import Path
+# ── Parse args first (before heavy imports) ───────────────────────────────────
+parser = argparse.ArgumentParser(description="Soci local fine-tune")
+parser.add_argument("--resume",  action="store_true", help="Resume from saved LoRA adapters")
+parser.add_argument("--debug",   action="store_true", help="Debug/smoke-test: 1 epoch, 20 examples, no push")
+parser.add_argument("--no-push", action="store_true", help="Skip HF Hub push")
+parser.add_argument("--no-gguf", action="store_true", help="Skip GGUF export")
+parser.add_argument("--epochs",  type=int, default=None, help="Override epoch count")
+parser.add_argument("--hf-repo", default=None, help="HF repo ID (overrides default)")
+args = parser.parse_args()
+# ── Paths ─────────────────────────────────────────────────────────────────────
+TRAIN_DIR        = Path("data/training")
+LORA_SAVE_DIR    = TRAIN_DIR / "lora_adapters"
+DATA_ARCHIVE_DIR = TRAIN_DIR / "data_archive"
+GGUF_DIR         = TRAIN_DIR / "gguf"
+CHECKPOINTS_DIR  = TRAIN_DIR / "checkpoints"
+ROUND_FILE       = TRAIN_DIR / "training_round.json"
+CORE_DATA_FILE   = TRAIN_DIR / "core_examples.json"
+LIVE_DATA_FILE   = TRAIN_DIR / "processed" / "soci_training.jsonl"
+for d in [LORA_SAVE_DIR, DATA_ARCHIVE_DIR, GGUF_DIR, CHECKPOINTS_DIR]:
+    d.mkdir(parents=True, exist_ok=True)
+# ── Config ────────────────────────────────────────────────────────────────────
+MAX_SEQ_LENGTH = 2048
+HF_USERNAME    = "RayMelius"
+REPO_NAME      = "soci-agent-q4"
+HF_REPO_ID     = args.hf_repo or f"{HF_USERNAME}/{REPO_NAME}"
+# Load HF token
+try:
+    from dotenv import load_dotenv
+    load_dotenv()
+except ImportError:
+    pass
+HF_TOKEN = os.environ.get("HF_TOKEN", "")
+if not HF_TOKEN:
+    # Try to read from the project .env
+    env_file = Path(".env")
+    if env_file.exists():
+        for line in env_file.read_text().splitlines():
+            if line.startswith("HF_TOKEN="):
+                HF_TOKEN = line.split("=", 1)[1].strip().strip('"')
+# ── GPU check ─────────────────────────────────────────────────────────────────
+import torch
+if not torch.cuda.is_available():
+    print("[WARN] No CUDA GPU detected — training will be very slow on CPU.")
+    print("       Consider running on Colab or a machine with a GPU.")
+else:
+    print(f"GPU : {torch.cuda.get_device_name(0)}")
+    print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
+# ── Determine training round ──────────────────────────────────────────────────
+RESUME = args.resume
+if RESUME and ROUND_FILE.exists():
+    round_info = json.loads(ROUND_FILE.read_text())
+    CURRENT_ROUND = round_info["round"] + 1
+    print(f"Resuming from round {round_info['round']} -> round {CURRENT_ROUND}")
+    print(f"Previous loss: {round_info.get('final_loss', 'N/A')}")
+elif RESUME:
+    CURRENT_ROUND = 2
+    print("No round file found, assuming round 2")
+else:
+    CURRENT_ROUND = 1
+    print("Starting fresh (round 1)")
+# ── Load model ────────────────────────────────────────────────────────────────
+from unsloth import FastLanguageModel  # noqa: already imported via 'import unsloth'
+if RESUME and LORA_SAVE_DIR.exists() and any(LORA_SAVE_DIR.iterdir()):
+    model, tokenizer = FastLanguageModel.from_pretrained(
+        model_name     = str(LORA_SAVE_DIR),
+        max_seq_length = MAX_SEQ_LENGTH,
+        dtype          = None,
+        load_in_4bit   = True,
+    )
+    print(f"Resumed LoRA adapters from {LORA_SAVE_DIR}")
+else:
+    if RESUME:
+        print(f"[WARN] No LoRA adapters at {LORA_SAVE_DIR}, starting fresh.")
+        CURRENT_ROUND = 1
+    model, tokenizer = FastLanguageModel.from_pretrained(
+        model_name     = "unsloth/Qwen2.5-0.5B-Instruct",
+        max_seq_length = MAX_SEQ_LENGTH,
+        dtype          = None,
+        load_in_4bit   = True,
+    )
+    print("Fresh base model loaded (round 1)")
+# ── Attach LoRA ───────────────────────────────────────────────────────────────
+if CURRENT_ROUND == 1:
+    model = FastLanguageModel.get_peft_model(
+        model,
+        r                          = 16,
+        target_modules             = ["q_proj", "k_proj", "v_proj", "o_proj",
+                                      "gate_proj", "up_proj", "down_proj"],
+        lora_alpha                 = 16,
+        lora_dropout               = 0,
+        bias                       = "none",
+        use_gradient_checkpointing = "unsloth",
+        random_state               = 42,
+    )
+    print("Fresh LoRA adapters attached")
+else:
+    model.gradient_checkpointing_enable()
+    print(f"Resumed LoRA adapters from round {CURRENT_ROUND - 1}")
+model.print_trainable_parameters()
+# ── System prompt ─────────────────────────────────────────────────────────────
+SYSTEM_PROMPT = (
+    "You are the reasoning engine for Soci, an LLM-powered city population simulator. "
+    "You control AI agents (NPCs) living in a city. Each agent has a persona, needs "
+    "(hunger, energy, social, purpose, comfort, fun), memories, and relationships. "
+    "You receive structured context and must respond ONLY with valid JSON. "
+    "Never add explanation outside the JSON."
+)
+# ── Load training data ────────────────────────────────────────────────────────
+print("\nLoading training data...")
+# 1. Core examples (from data/training/core_examples.json, extracted from v3 script)
+core_examples: list[dict] = []
+if CORE_DATA_FILE.exists():
+    core_examples = json.loads(CORE_DATA_FILE.read_text(encoding="utf-8"))
+    print(f"  Core examples: {len(core_examples)}")
+else:
+    print(f"  [WARN] {CORE_DATA_FILE} not found — run extract step or collect_training_data.py first")
+# 2. Live collected data from the running simulation
+live_examples: list[dict] = []
+if LIVE_DATA_FILE.exists():
+    with open(LIVE_DATA_FILE, encoding="utf-8") as f:
+        for line in f:
+            line = line.strip()
+            if not line:
+                continue
+            try:
+                ex = json.loads(line)
+                # Convert messages format -> instruction/response format
+                msgs = ex.get("messages", [])
+                if len(msgs) >= 3:
+                    # Find system-ish context in user message; use Soci system prompt
+                    user_content = msgs[1]["content"]
+                    asst_content = msgs[2]["content"]
+                    # Prepend persona context from system message as part of instruction
+                    persona_ctx = msgs[0]["content"]
+                    # Keep persona as part of instruction since we use unified system prompt
+                    instruction = f"{persona_ctx}\n\n{user_content}"
+                    live_examples.append({
+                        "instruction": instruction,
+                        "response": asst_content,
+                    })
+            except (json.JSONDecodeError, KeyError):
+                pass
+    print(f"  Live examples: {len(live_examples)} (from Render simulation)")
+# 3. Replay archived examples from previous rounds
+replay_examples: list[dict] = []
+if CURRENT_ROUND > 1:
+    for archive_f in sorted(DATA_ARCHIVE_DIR.glob("round_*.json")):
+        try:
+            batch = json.loads(archive_f.read_text(encoding="utf-8"))
+            replay_examples.extend(batch)
+        except Exception:
+            pass
+    print(f"  Replay examples: {len(replay_examples)}")
+# 4. New examples for this round (add yours here for incremental training)
+new_examples_this_round: list[dict] = [
+    # Add new instruction/response pairs here for incremental training rounds.
+    # Example:
+    # {"instruction": "You are playing Diana Novak, 41, grocery store owner. ...",
+    #  "response": '{"action": "work", "location": "grocery_store", "reason": "..."}'},
+]
+if new_examples_this_round:
+    print(f"  New examples this round: {len(new_examples_this_round)}")
+# Merge and deduplicate by instruction
+seen: set[str] = set()
+all_examples: list[dict] = []
+for ex in core_examples + live_examples + new_examples_this_round + replay_examples:
+    key = ex.get("instruction", "")[:100]
+    if key not in seen:
+        seen.add(key)
+        all_examples.append(ex)
+if args.debug:
+    all_examples = all_examples[:20]
+    print(f"  DEBUG mode: using {len(all_examples)} examples")
+print(f"  Total (deduped): {len(all_examples)}")
+# ── Format into chat template ─────────────────────────────────────────────────
+from datasets import Dataset
+def format_example(ex: dict) -> dict:
+    msgs = [
+        {"role": "system",    "content": SYSTEM_PROMPT},
+        {"role": "user",      "content": ex["instruction"]},
+        {"role": "assistant", "content": ex["response"]},
+    ]
+    return {"text": tokenizer.apply_chat_template(
+        msgs, tokenize=False, add_generation_prompt=False
+    )}
+dataset = Dataset.from_list(all_examples).map(format_example)
+print(f"Formatted {len(dataset)} examples. Sample:")
+print(dataset[0]["text"][:400])
+# ── Training config ───────────────────────────────────────────────────────────
+from trl import SFTTrainer, SFTConfig
+from unsloth import is_bfloat16_supported
+if args.debug:
+    LR, EPOCHS, WARMUP, SCHEDULER = 2e-4, 1, 2, "linear"
+    print(f"\nDEBUG: 1 epoch smoke test")
+elif CURRENT_ROUND == 1:
+    LR, EPOCHS, WARMUP, SCHEDULER = 2e-4, 3, 5, "linear"
+    print(f"\nRound 1: Full training — LR={LR}, epochs={EPOCHS}")
+else:
+    LR, EPOCHS, WARMUP, SCHEDULER = 5e-5, 2, 10, "cosine"
+    print(f"\nRound {CURRENT_ROUND}: Incremental — LR={LR}, epochs={EPOCHS}")
+if args.epochs is not None:
+    EPOCHS = args.epochs
+    print(f"Epoch override: {EPOCHS}")
+trainer = SFTTrainer(
+    model              = model,
+    tokenizer          = tokenizer,
+    train_dataset      = dataset,
+    dataset_text_field = "text",
+    max_seq_length     = MAX_SEQ_LENGTH,
+    dataset_num_proc   = 2,
+    args = SFTConfig(
+        per_device_train_batch_size = 2,
+        gradient_accumulation_steps = 4,
+        warmup_steps                = WARMUP,
+        num_train_epochs            = EPOCHS,
+        learning_rate               = LR,
+        fp16                        = not is_bfloat16_supported(),
+        bf16                        = is_bfloat16_supported(),
+        logging_steps               = 5,
+        optim                       = "adamw_8bit",
+        weight_decay                = 0.01,
+        lr_scheduler_type           = SCHEDULER,
+        seed                        = 42,
+        output_dir                  = str(CHECKPOINTS_DIR),
+        report_to                   = "none",
+        dataset_text_field          = "text",
+        max_seq_length              = MAX_SEQ_LENGTH,
+    ),
+)
+print(f"\nTraining round {CURRENT_ROUND} on {len(dataset)} examples...")
+stats = trainer.train()
+print(f"\nRound {CURRENT_ROUND} complete!")
+print(f"   Steps: {stats.global_step}  |  Final loss: {stats.training_loss:.4f}")
+# ── Save LoRA adapters ────────────────────────────────────────────────────────
+print(f"\nSaving LoRA adapters to {LORA_SAVE_DIR}...")
+model.save_pretrained(str(LORA_SAVE_DIR))
+tokenizer.save_pretrained(str(LORA_SAVE_DIR))
+print("  Saved.")
+# ── Save round metadata ───────────────────────────────────────────────────────
+round_info = {
+    "round":          CURRENT_ROUND,
+    "final_loss":     stats.training_loss,
+    "global_steps":   stats.global_step,
+    "total_examples": len(all_examples),
+    "new_examples":   len(new_examples_this_round) + len(live_examples),
+    "learning_rate":  LR,
+    "epochs":         EPOCHS,
+    "timestamp":      datetime.now().isoformat(),
+}
+ROUND_FILE.write_text(json.dumps(round_info, indent=2))
+print(f"  Round info: {ROUND_FILE}")
+# Archive new examples
+all_new = new_examples_this_round + live_examples
+if all_new:
+    archive_file = DATA_ARCHIVE_DIR / f"round_{CURRENT_ROUND:03d}.json"
+    archive_file.write_text(json.dumps(all_new, indent=2, ensure_ascii=False))
+    print(f"  Archived {len(all_new)} new examples")
+# Training history
+history_file = TRAIN_DIR / "training_history.jsonl"
+with open(history_file, "a", encoding="utf-8") as f:
+    f.write(json.dumps(round_info) + "\n")
+# ── Quick inference test ──────────────────────────────────────────────────────
+print(f"\n=== Testing after Round {CURRENT_ROUND} ===\n")
+FastLanguageModel.for_inference(model)
+def ask(question: str, label: str = "") -> None:
+    msgs = [
+        {"role": "system", "content": SYSTEM_PROMPT},
+        {"role": "user",   "content": question},
+    ]
+    encoded = tokenizer.apply_chat_template(
+        msgs, tokenize=True, add_generation_prompt=True, return_tensors="pt"
+    )
+    if hasattr(encoded, "input_ids"):
+        inp = encoded.input_ids.to("cuda")
+    else:
+        inp = encoded.to("cuda")
+    out = model.generate(
+        input_ids=inp, max_new_tokens=200,
+        temperature=0.7, top_p=0.9, do_sample=True,
+    )
+    resp = tokenizer.decode(out[0][inp.shape[1]:], skip_special_tokens=True)
+    print(f"[{label}]")
+    print(f"Q: {question[:100]}...")
+    try:
+        parsed = json.loads(resp)
+        print(f"A (valid JSON):\n{json.dumps(parsed, indent=2)}")
+    except Exception:
+        print(f"A (raw): {resp}")
+    print("-" * 60)
+ask(
+    "You are playing Elena Vasquez, 34, software engineer. "
+    "Needs: energy=0.3, hunger=0.7. Location: office. Time: 12:30. "
+    "Decide next action. JSON: {\"action\": str, \"location\": str, \"reason\": str}",
+    "decide_action",
+)
+ask(
+    "You are playing Marcus Chen talking to Zoe. "
+    "Zoe says: 'Marcus, I bombed my exam.' Continue as Marcus. "
+    "JSON: {\"speech\": str, \"emotion\": str}",
+    "conversation_turn",
+)
+# ── GGUF export ───────────────────────────────────────────────────────────────
+# Windows: unsloth GGUF export requires building llama.cpp via apt-get (Linux only).
+# Auto-skip on Windows; use --no-gguf on Linux too if llama.cpp isn't set up.
+import platform
+_on_windows = platform.system() == "Windows"
+skip_gguf = args.no_gguf or args.debug or _on_windows
+if _on_windows and not args.no_gguf and not args.debug:
+    print("\nSkipping GGUF export (Windows — llama.cpp build not supported via unsloth on Win)")
+    print("  To export GGUF manually, use llama.cpp's convert_hf_to_gguf.py")
+    print(f"  LoRA merged weights saved to: {GGUF_DIR}/  (after push)")
+if not skip_gguf:
+    print(f"\nExporting GGUF Q4_K_M (takes a few minutes)...")
+    model.save_pretrained_gguf(str(GGUF_DIR), tokenizer, quantization_method="q4_k_m")
+    gguf_files = list(GGUF_DIR.glob("*.gguf"))
+    for gf in gguf_files:
+        print(f"  GGUF: {gf.name}  ({gf.stat().st_size / 1e6:.0f} MB)")
+else:
+    if args.debug:
+        print("\nSkipping GGUF export (debug mode)")
+    gguf_files = []
+# ── Push to HuggingFace Hub ───────────────────────────────────────────────────
+skip_push = args.no_push or args.debug
+if skip_push:
+    print("\nSkipping HF push (debug mode or --no-push)")
+else:
+    if not HF_TOKEN:
+        print("\n[WARN] No HF_TOKEN found — skipping push.")
+        print("  Set HF_TOKEN env var or add to .env file.")
+    else:
+        from huggingface_hub import login, HfApi
+        print(f"\nPushing to HuggingFace: {HF_REPO_ID}")
+        login(token=HF_TOKEN)
+        api = HfApi()
+        api.create_repo(repo_id=HF_REPO_ID, repo_type="model", exist_ok=True)
+        # Push LoRA adapters
+        print("  Uploading LoRA adapters...")
+        api.upload_folder(
+            folder_path = str(LORA_SAVE_DIR),
+            repo_id     = HF_REPO_ID,
+            repo_type   = "model",
+            path_in_repo= "lora_adapters",
+        )
+        print(f"  LoRA -> https://huggingface.co/{HF_REPO_ID}/tree/main/lora_adapters")
+        # Push GGUF file(s)
+        for gf in gguf_files:
+            mb = gf.stat().st_size / 1e6
+            print(f"  Uploading {gf.name} ({mb:.0f} MB)...")
+            api.upload_file(
+                path_or_fileobj = str(gf),
+                path_in_repo    = gf.name,
+                repo_id         = HF_REPO_ID,
+                repo_type       = "model",
+            )
+            print(f"  Done: https://huggingface.co/{HF_REPO_ID}/blob/main/{gf.name}")
+        # Push round metadata
+        api.upload_file(
+            path_or_fileobj = str(ROUND_FILE),
+            path_in_repo    = "training_round.json",
+            repo_id         = HF_REPO_ID,
+            repo_type       = "model",
+        )
+        print(f"\nUpload complete! Model at: https://huggingface.co/{HF_REPO_ID}")
+# ── Training history display ──────────────────────────────────────────────────
+print("\n=== Training History ===\n")
+if history_file.exists():
+    print(f"{'Round':>6} {'Loss':>8} {'Steps':>7} {'Examples':>9} {'New':>5} {'LR':>10} {'Date':>12}")
+    print("-" * 65)
+    with open(history_file, encoding="utf-8") as f:
+        for line in f:
+            r = json.loads(line)
+            date = r.get("timestamp", "")[:10]
+            print(f"{r['round']:>6} {r['final_loss']:>8.4f} {r['global_steps']:>7} "
+                  f"{r['total_examples']:>9} {r['new_examples']:>5} "
+                  f"{r['learning_rate']:>10.1e} {date:>12}")
+print(f"\nTo resume: python scripts/finetune_local.py --resume")
+print(f"LoRA adapters: {LORA_SAVE_DIR}")
+if gguf_files:
+    print(f"GGUF: {gguf_files[0]}")
+print(f"\nOllama integration:")
+print(f"  ollama create soci-agent -f Modelfile")
+print(f"  set SOCI_PROVIDER=ollama && set OLLAMA_MODEL=soci-agent")

src/soci/api/routes.py CHANGED Viewed

@@ -286,10 +286,12 @@ async def get_llm_providers():
         or os.environ.get("HF_API_TOKEN")
     )
     if has_hf:
         providers.append({"id": "hf", "model": "HuggingFaceH4/zephyr-7b-beta",           "label": "HF Zephyr 7B",    "icon": "🤗"})
         providers.append({"id": "hf", "model": "Qwen/Qwen2.5-7B-Instruct",               "label": "HF Qwen 2.5 7B",  "icon": "🤗"})
         providers.append({"id": "hf", "model": "meta-llama/Llama-3.2-3B-Instruct",       "label": "HF Llama 3.2 3B", "icon": "🤗"})
         providers.append({"id": "hf", "model": "mistralai/Mistral-7B-Instruct-v0.3",     "label": "HF Mistral 7B",   "icon": "🤗"})
     providers.append({"id": "ollama", "label": "Ollama (local)",           "icon": "🦙", "model": ""})
     return {"current": current, "current_model": current_model, "providers": providers}

         or os.environ.get("HF_API_TOKEN")
     )
     if has_hf:
+        providers.append({"id": "hf", "model": "RayMelius/soci-agent-q4",                "label": "Soci Agent (fine-tuned)", "icon": "🏙"})
         providers.append({"id": "hf", "model": "HuggingFaceH4/zephyr-7b-beta",           "label": "HF Zephyr 7B",    "icon": "🤗"})
         providers.append({"id": "hf", "model": "Qwen/Qwen2.5-7B-Instruct",               "label": "HF Qwen 2.5 7B",  "icon": "🤗"})
         providers.append({"id": "hf", "model": "meta-llama/Llama-3.2-3B-Instruct",       "label": "HF Llama 3.2 3B", "icon": "🤗"})
         providers.append({"id": "hf", "model": "mistralai/Mistral-7B-Instruct-v0.3",     "label": "HF Mistral 7B",   "icon": "🤗"})
+    providers.append({"id": "ollama", "label": "Soci Agent (Ollama)",      "icon": "🏙", "model": "soci-agent"})
     providers.append({"id": "ollama", "label": "Ollama (local)",           "icon": "🦙", "model": ""})
     return {"current": current, "current_model": current_model, "providers": providers}

src/soci/engine/llm.py CHANGED Viewed

@@ -64,6 +64,10 @@ MODEL_HF_QWEN = "Qwen/Qwen2.5-7B-Instruct"          # default — auto-routed, g
 MODEL_HF_LLAMA = "meta-llama/Llama-3.2-3B-Instruct"
 MODEL_HF_MISTRAL = "mistralai/Mistral-7B-Instruct-v0.3"
 MODEL_HF_SMOL = "HuggingFaceTB/SmolLM3-3B:hf-inference"  # CPU inference, no credits needed
 # Approximate cost per 1M tokens (USD) — Ollama is free, Groq is very cheap
 COST_PER_1M = {

 MODEL_HF_LLAMA = "meta-llama/Llama-3.2-3B-Instruct"
 MODEL_HF_MISTRAL = "mistralai/Mistral-7B-Instruct-v0.3"
 MODEL_HF_SMOL = "HuggingFaceTB/SmolLM3-3B:hf-inference"  # CPU inference, no credits needed
+MODEL_HF_SOCI = "RayMelius/soci-agent-q4"            # Soci fine-tuned Qwen2.5-0.5B (LoRA)
+# Ollama model IDs for Soci fine-tuned models
+MODEL_OLLAMA_SOCI = "soci-agent"   # load via: ollama create soci-agent -f Modelfile
 # Approximate cost per 1M tokens (USD) — Ollama is free, Groq is very cheap
 COST_PER_1M = {

src/soci/engine/simulation.py CHANGED Viewed

@@ -858,6 +858,7 @@ class Simulation:
             "events": self.events.to_dict(),
             "entropy": self.entropy.to_dict(),
             "conversation_counter": self._conversation_counter,
         }
     @classmethod
@@ -869,6 +870,7 @@ class Simulation:
         sim.events = EventSystem.from_dict(data["events"])
         sim.entropy = EntropyManager.from_dict(data["entropy"])
         sim._conversation_counter = data.get("conversation_counter", 0)
         for aid, agent_data in data["agents"].items():
             agent = Agent.from_dict(agent_data)
             sim.agents[agent.id] = agent

             "events": self.events.to_dict(),
             "entropy": self.entropy.to_dict(),
             "conversation_counter": self._conversation_counter,
+            "conversation_history": self.conversation_history,
         }
     @classmethod
         sim.events = EventSystem.from_dict(data["events"])
         sim.entropy = EntropyManager.from_dict(data["entropy"])
         sim._conversation_counter = data.get("conversation_counter", 0)
+        sim.conversation_history = data.get("conversation_history", [])
         for aid, agent_data in data["agents"].items():
             agent = Agent.from_dict(agent_data)
             sim.agents[agent.id] = agent