Spaces:

google
/

functiongemma-tuning-lab

Running

App Files Files Community

bebechien commited on Nov 28, 2025

Commit

fdf7bd6

verified ·

1 Parent(s): c055e6e

Upload folder using huggingface_hub

Browse files

Files changed (12) hide show

__pycache__/config.cpython-312.pyc +0 -0
__pycache__/engine.cpython-312.pyc +0 -0
__pycache__/tools.cpython-312.pyc +0 -0
__pycache__/ui.cpython-312.pyc +0 -0
__pycache__/utils.cpython-312.pyc +0 -0
app.py +11 -558
config.py +22 -0
engine.py +365 -0
requirements.txt +1 -0
tools.py +45 -0
ui.py +149 -0
utils.py +78 -0

__pycache__/config.cpython-312.pyc ADDED Viewed

Binary file (1.33 kB). View file

__pycache__/engine.cpython-312.pyc ADDED Viewed

Binary file (17.8 kB). View file

__pycache__/tools.cpython-312.pyc ADDED Viewed

Binary file (851 Bytes). View file

__pycache__/ui.cpython-312.pyc ADDED Viewed

Binary file (7.69 kB). View file

__pycache__/utils.cpython-312.pyc ADDED Viewed

Binary file (4.03 kB). View file

app.py CHANGED Viewed

@@ -1,562 +1,15 @@
-import gradio as gr
-import os
-import json
-import torch
-import csv
-import shutil
-import time
-import threading
-from typing import Final, Optional, List, Any, Generator
-from pathlib import Path
-from dataclasses import dataclass
-from huggingface_hub import login
-from trl import SFTConfig, SFTTrainer
-from transformers import (
-    AutoTokenizer,
-    AutoModelForCausalLM,
-    TrainerCallback,
-    TrainingArguments,
-    TrainerControl,
-    TrainerState
-)
-from datasets import Dataset, load_dataset
-# --- Configuration ---
-class AppConfig:
-    """
-    Central configuration class.
-    """
-    ARTIFACTS_DIR: Final[Path] = Path("artifacts")
-    ARTIFACTS_DIR.mkdir(parents=True, exist_ok=True)
-    HF_TOKEN: Final[Optional[str]] = os.getenv('HF_TOKEN')
-    MODEL_NAME: Final[str] = '../hf/270m'
-    DEFAULT_DATASET: Final[str] = 'bebechien/SimpleToolCalling'
-    OUTPUT_DIR: Final[Path] = ARTIFACTS_DIR.joinpath("functiongemma-modkit-demo")
-# --- Tool Definitions ---
-def search_knowledge_base(query: str) -> str:
-    """
-    Search internal company documents, policies and project data.
-    Args:
-        query: query string
-    """
-    return "Interal Result"
-def search_google(query: str) -> str:
-    """
-    Search public information.
-    Args:
-        query: query string
-    """
-    return "Public Result"
-search_knowledge_base_schema = {
-  "type": "function",
-  "function": {
-    "name": "search_knowledge_base",
-    "description": "Search internal company documents, policies and project data.",
-    "parameters": {
-      "type": "object",
-      "properties": {
-        "query": {
-          "type": "string",
-          "description": "query string"
-        }
-      },
-      "required": [
-        "query"
-      ]
-    },
-    "return": {
-      "type": "string"
-    }
-  }
-}
-search_google_schema = {
-  "type": "function",
-  "function": {
-    "name": "search_google",
-    "description": "Search public information.",
-    "parameters": {
-      "type": "object",
-      "properties": {
-        "query": {
-          "type": "string",
-          "description": "query string"
-        }
-      },
-      "required": [
-        "query"
-      ]
-    },
-    "return": {
-      "type": "string"
-    }
-  }
-}
-TOOLS = [search_knowledge_base_schema, search_google_schema]
-DEFAULT_SYSTEM_MSG = "You are a model that can do function calling with the following functions"
-# --- Callbacks ---
-class AbortCallback(TrainerCallback):
-    """
-    A custom callback to check a threading Event to stop training on user request.
-    """
-    def __init__(self, stop_event: threading.Event):
-        self.stop_event = stop_event
-    def on_step_end(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs):
-        if self.stop_event.is_set():
-            print("🛑 Stop signal received. Stopping training...")
-            control.should_training_stop = True
-# --- Helper Functions ---
-def authenticate_hf(token: Optional[str]) -> None:
-    """Logs into the Hugging Face Hub."""
-    if token:
-        print("Logging into Hugging Face Hub...")
-        login(token=token)
-    else:
-        print("Skipping Hugging Face login: HF_TOKEN not set.")
-def load_model_and_tokenizer(model_name: str):
-    print(f"Loading Transformer model: {model_name}")
-    try:
-        # Check if local path exists, otherwise treat as HF Hub ID
-        if model_name.startswith("..") and not os.path.exists(model_name):
-            print(f"Warning: Local path {model_name} not found. Falling back to default hub model.")
-            model_name = "google/gemma-2b-it" # Fallback example
-        tokenizer = AutoTokenizer.from_pretrained(model_name)
-        model = AutoModelForCausalLM.from_pretrained(model_name)
-        print("Model loaded successfully.")
-        return model, tokenizer
-    except Exception as e:
-        print(f"Error loading Transformer model {model_name}: {e}")
-        raise e
-def create_conversation_format(sample):
-    """Formats a dataset row into the conversational format required for SFT."""
-    try:
-        tool_args = json.loads(sample["tool_arguments"])
-    except (json.JSONDecodeError, TypeError):
-        tool_args = {}
-    return {
-        "messages": [
-            {"role": "developer", "content": DEFAULT_SYSTEM_MSG},
-            {"role": "user", "content": sample["user_content"]},
-            {"role": "assistant", "tool_calls": [{"type": "function", "function": {"name": sample["tool_name"], "arguments": tool_args}}]},
-        ],
-        "tools": TOOLS
-    }
-# --- Main Application Logic ---
-class FunctionGemmaTuner:
-    def __init__(self, config: AppConfig = AppConfig):
-        self.config = config
-        self.model = None
-        self.tokenizer = None
-        self.imported_dataset = []
-        # Threading event to control stopping
-        self.stop_event = threading.Event()
-        authenticate_hf(self.config.HF_TOKEN)
-        # Initial load attempt
-        print("--- Running Initial Data Load ---")
-        try:
-            self.refresh_data_and_model()
-            print("--- Initial Load Complete ---")
-        except Exception as e:
-            print(f"Initial load failed (this is common if model path is invalid): {e}")
-    def refresh_data_and_model(self):
-        """Reloads the model and clears imported data."""
-        print("\n" + "=" * 50)
-        print("RELOADING MODEL and RE-FETCHING DATA")
-        self.imported_dataset = []
-        try:
-            self.model, self.tokenizer = load_model_and_tokenizer(self.config.MODEL_NAME)
-            status_value = "Model and data reloaded. Ready."
-        except Exception as e:
-            self.model = None
-            self.tokenizer = None
-            status_value = f"CRITICAL ERROR: Model failed to load. {e}"
-            # We don't raise here to allow the UI to render the error message
-        return status_value
-    def import_additional_dataset(self, file_path: str) -> str:
-        """Parses an uploaded CSV file."""
-        if not file_path:
-            return "Please upload a CSV file."
-        new_dataset = []
-        num_imported = 0
-        try:
-            # Open file handle properly
-            with open(file_path, 'r', newline='', encoding='utf-8') as f:
-                reader = csv.reader(f)
-                # Basic header validation
-                try:
-                    header = next(reader)
-                    # Simple heuristic check, allows skipping header or rewinding
-                    if not (header and "anchor" in header[0].lower()):
-                        f.seek(0)
-                except StopIteration:
-                    return "Error: Uploaded file is empty."
-                for row in reader:
-                    # Expecting: [User Prompt, Tool Name, Tool Args JSON/String]
-                    if len(row) >= 3:
-                        new_dataset.append([s.strip() for s in row[:3]])
-                        num_imported += 1
-            if num_imported == 0:
-                return "No valid rows found. CSV format: [Anchor, Positive, Negative]"
-            self.imported_dataset = new_dataset
-            return f"Successfully imported {num_imported} additional training samples."
-        except Exception as e:
-            return f"Import failed. Error: {e}"
-    def stop_training(self):
-        """Signal the training loop to stop."""
-        print("Set stop event")
-        self.stop_event.set()
-        return "Stopping initiated... please wait for the current step to finish."
-    def run_training(self, test_size: float = 0.5) -> Generator[str, None, None]:
-        """
-        Main training logic. Yields status strings to the UI.
-        """
-        # 1. Validation
-        if self.model is None:
-             yield "Training failed: Model is not loaded."
-             return
-        self.stop_event.clear() # Reset stop flag
-        yield "⏳ Preparing Dataset..."
-        # 2. Dataset Preparation
-        if not self.imported_dataset:
-            print("No imported dataset, using default HF dataset")
-            try:
-                dataset = load_dataset(self.config.DEFAULT_DATASET, split="train")
-            except Exception as e:
-                yield f"Error loading default dataset: {e}"
-                return
-        else:
-            dataset_as_dicts = [{
-                "user_content": row[0], "tool_name": row[1], "tool_arguments": row[2]}
-                for row in self.imported_dataset
-            ]
-            dataset = Dataset.from_list(dataset_as_dicts)
-        # Apply formatting
-        dataset = dataset.map(create_conversation_format, batched=False)
-        # Split
-        if len(dataset) > 1:
-            dataset = dataset.train_test_split(test_size=test_size, shuffle=False)
-        else:
-            # Fallback for very small datasets (mostly for debugging)
-            dataset = {"train": dataset, "test": dataset}
-        output_buffer = "📊 Evaluating Pre-Training Success Rate...\n### Success Rate (Before Training):\n"
-        yield output_buffer
-        pre_training_report = ""
-        gen = self.check_success_rate(dataset["test"])
-        while not self.stop_event.is_set():
-            try:
-                pre_training_report += f"{next(gen)}\n"
-                yield f"{output_buffer}{pre_training_report}"
-            except StopIteration as e:
-                pre_training_report = e.value
-                break
-        if self.stop_event.is_set():
-            output_buffer += f"{pre_training_report}\n\n🛑 Manual Eval interrupted by user.\n"
-            yield output_buffer
-            return
-        output_buffer += f"{pre_training_report}\n\n"
-        output_buffer += "-" * 30 + "\nStarting Fine-tuning...\n"
-        yield output_buffer
-        # 3. Training Setup
-        torch_dtype = self.model.dtype
-        args = SFTConfig(
-            output_dir=str(self.config.OUTPUT_DIR),
-            max_length=512,
-            packing=False,
-            num_train_epochs=5,
-            per_device_train_batch_size=4,
-            gradient_checkpointing=False,
-            optim="adamw_torch_fused",
-            logging_steps=1,
-            save_strategy="no", # Speed up demo
-            eval_strategy="epoch",
-            learning_rate=5e-5,
-            fp16=True if torch_dtype == torch.float16 else False,
-            bf16=True if torch_dtype == torch.bfloat16 else False,
-            lr_scheduler_type="constant",
-            push_to_hub=False,
-            report_to="none",
-            dataset_kwargs={
-                "add_special_tokens": False,
-                "append_concat_token": True,
-            }
-        )
-        trainer = SFTTrainer(
-            model=self.model,
-            args=args,
-            train_dataset=dataset['train'],
-            eval_dataset=dataset['test'],
-            processing_class=self.tokenizer,
-            callbacks=[AbortCallback(self.stop_event)] # Inject our stopper
-        )
-        # 4. Run Training
-        try:
-            output_buffer += "🚀 Training in progress... (Click Stop to interrupt)\n"
-            yield output_buffer
-            trainer.train()
-            if self.stop_event.is_set():
-                output_buffer += "\n🛑 Training interrupted by user.\n"
-            else:
-                output_buffer += "\n✅ Training finished. Model weights updated in memory.\n"
-            yield output_buffer
-            # Save locally
-            trainer.save_model()
-            output_buffer += f"Model saved locally to: {self.config.OUTPUT_DIR}\n"
-            yield output_buffer
-        except Exception as e:
-            output_buffer += f"\n❌ Error during training: {e}\n"
-            yield output_buffer
-            return
-        if self.stop_event.is_set():
-            return
-        # 5. Post-Evaluation
-        output_buffer += "📊 Evaluating Post-Training Success Rate...\n"
-        post_report = ""
-        yield output_buffer
-        gen = self.check_success_rate(dataset["test"])
-        while not self.stop_event.is_set():
-            try:
-                post_report += f"{next(gen)}\n"
-                yield f"{output_buffer}{post_report}"
-            except StopIteration as e:
-                post_report = e.value
-                break
-        if self.stop_event.is_set():
-            output_buffer += f"{post_report}\n\n🛑 Manual Eval interrupted by user.\n"
-            yield output_buffer
-            return
-        output_buffer += f"{post_report}\n\n"
-        yield output_buffer
-    def check_success_rate(self, test_dataset):
-        """Runs inference on test set to calculate accuracy."""
-        results = []
-        success_count = 0
-        total = len(test_dataset)
-        for idx, item in enumerate(test_dataset):
-            if idx >= 5:
-                break
-            if self.stop_event.is_set():
-                break
-            messages = [item["messages"][0], item["messages"][1]] # System + User
-            try:
-                inputs = self.tokenizer.apply_chat_template(
-                    messages,
-                    tools=TOOLS,
-                    add_generation_prompt=True,
-                    return_dict=True,
-                    return_tensors="pt"
-                )
-                out = self.model.generate(
-                    **inputs.to(self.model.device),
-                    pad_token_id=self.tokenizer.eos_token_id,
-                    max_new_tokens=128
-                )
-                # Decode only the new tokens
-                output = self.tokenizer.decode(out[0][len(inputs["input_ids"][0]) :], skip_special_tokens=True)
-                results.append(f"{idx+1}. Prompt: {item['messages'][1]['content']}")
-                yield results[-1]
-                results.append(f"   Output: {output[:100]}...")
-                yield results[-1]
-                # Check for correct tool name usage
-                expected_tool = item['messages'][2]['tool_calls'][0]['function']['name']
-                if expected_tool in output:
-                    results.append("   -> ✅ Correct Tool")
-                    yield results[-1]
-                    success_count += 1
-                else:
-                    results.append(f"   -> ❌ Wrong Tool (Expected: {expected_tool})")
-                    yield results[-1]
-            except Exception as e:
-                results.append(f"   -> Error: {e}")
-                yield results[-1]
-        summary = "\n".join(results)
-        summary += f"\n\nTotal Success : {success_count} / {len(test_dataset)}"
-        return summary
-    def download_model_zip(self) -> Optional[str]:
-        """Zips the output directory for download."""
-        if not os.path.exists(self.config.OUTPUT_DIR):
-            return None
-        timestamp = int(time.time())
-        try:
-            base_name = self.config.ARTIFACTS_DIR.joinpath(f"functiongemma_finetuned_{timestamp}")
-            archive_path = shutil.make_archive(
-                base_name=str(base_name),
-                format='zip',
-                root_dir=str(self.config.OUTPUT_DIR),
-            )
-            return archive_path
-        except Exception as e:
-            print(f"Zip failed: {e}")
-            return None
-    # --- UI Builder ---
-    def build_interface(self) -> gr.Blocks:
-        with gr.Blocks(title="FunctionGemma Modkit") as demo:
-            gr.Markdown("# 🤖 FunctionGemma Modkit: Fine-Tuning")
-            gr.Markdown("Fine-tune FunctionGemma to understand your custom functions.")
-            with gr.Column():
-                gr.Markdown("## 1. Training Controls")
-                with gr.Row():
-                    run_training_btn = gr.Button("🚀 Run Fine-Tuning", variant="primary")
-                    stop_training_btn = gr.Button("🛑 Stop Training", variant="stop", visible=False)
-                output_display = gr.Textbox(
-                    lines=14,
-                    label="Training Logs & Search Results",
-                    value="Ready. Click 'Run' to begin.",
-                    interactive=False
-                )
-                clear_reload_btn = gr.Button("🔄 Reset Model & Data")
-                gr.Markdown("--- \n ## 2. Data Management")
-                import_file = gr.File(label="Upload Additional Dataset (.csv)", file_types=[".csv"], height=80)
-                import_status = gr.Markdown("")
-                gr.Markdown("--- \n ## 3. Export")
-                with gr.Row():
-                    zip_btn = gr.Button("⬇️ Prepare Model ZIP")
-                    download_file = gr.File(label="Download ZIP", height=80, visible=True, interactive=False)
-            # --- Event Wiring ---
-            # Start Training (Generator updates output_display)
-            run_training_btn.click(
-                fn=lambda: (
-                    gr.update(visible=False),
-                    gr.update(interactive=False),
-                    gr.update(visible=True)
-                ),
-                inputs=None,
-                outputs=[run_training_btn, clear_reload_btn, stop_training_btn]
-            ).then(
-                fn=self.run_training,
-                inputs=[],
-                outputs=[output_display],
-            ).then(
-                fn=lambda: (
-                    gr.update(visible=True),
-                    gr.update(interactive=True),
-                    gr.update(visible=False)
-                ),
-                inputs=None,
-                outputs=[run_training_btn, clear_reload_btn, stop_training_btn]
-            )
-            # Stop Training
-            stop_training_btn.click(
-                fn=self.stop_training,
-                inputs=None,
-                outputs=None # We don't need to return anything, status updates via the training generator
-            ).then(
-                fn=lambda: (
-                    gr.update(visible=True),
-                    gr.update(interactive=True),
-                    gr.update(visible=False)
-                ),
-                inputs=None,
-                outputs=[run_training_btn, clear_reload_btn, stop_training_btn]
-            )
-            # Reload
-            clear_reload_btn.click(
-                fn=self.refresh_data_and_model,
-                inputs=None,
-                outputs=[output_display]
-            )
-            # File Import
-            import_file.upload(
-                fn=self.import_additional_dataset,
-                inputs=[import_file],
-                outputs=[import_status]
-            )
-            # Download Logic
-            def handle_zip():
-                path = self.download_model_zip()
-                if path:
-                    return gr.update(value=path, visible=True)
-                return gr.update(value=None, visible=False)
-            zip_btn.click(
-                fn=handle_zip,
-                inputs=None,
-                outputs=[download_file]
-            )
-        return demo
 if __name__ == "__main__":
-    app = FunctionGemmaTuner(AppConfig)
-    demo = app.build_interface()
     print("Starting Gradio App...")
     demo.launch()

+from config import AppConfig
+from engine import FunctionGemmaEngine
+from ui import build_interface
 if __name__ == "__main__":
+    # Initialize Config
+    config = AppConfig()
+    # Initialize Logic Engine
+    app_engine = FunctionGemmaEngine(config)
+    # Build and Launch UI
+    demo = build_interface(app_engine)
     print("Starting Gradio App...")
     demo.launch()

config.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import os
+from pathlib import Path
+from typing import Final, Optional
+from dataclasses import dataclass
+@dataclass
+class AppConfig:
+    """
+    Central configuration class.
+    """
+    # Directory Setup
+    ARTIFACTS_DIR: Final[Path] = Path("artifacts")
+    OUTPUT_DIR: Final[Path] = ARTIFACTS_DIR.joinpath("functiongemma-modkit-demo")
+    # Model & Data
+    HF_TOKEN: Final[Optional[str]] = os.getenv('HF_TOKEN')
+    # Defaulting to a real model ID for safety, original was local path '../hf/270m'
+    MODEL_NAME: Final[str] = '../hf/270m'
+    DEFAULT_DATASET: Final[str] = 'bebechien/SimpleToolCalling'
+    def __post_init__(self):
+        self.ARTIFACTS_DIR.mkdir(parents=True, exist_ok=True)

engine.py ADDED Viewed

	@@ -0,0 +1,365 @@

+import threading
+import torch
+import time
+import json
+import queue
+import matplotlib.pyplot as plt
+from functools import partial
+from typing import Generator, Optional, List, Dict
+from datasets import Dataset, load_dataset
+from trl import SFTConfig, SFTTrainer
+from transformers import TrainerCallback, TrainingArguments, TrainerState, TrainerControl
+from config import AppConfig
+from tools import DEFAULT_TOOLS
+from utils import (
+    authenticate_hf,
+    load_model_and_tokenizer,
+    create_conversation_format,
+    parse_csv_dataset,
+    zip_directory
+)
+class AbortCallback(TrainerCallback):
+    def __init__(self, stop_event: threading.Event):
+        self.stop_event = stop_event
+    def on_step_end(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs):
+        if self.stop_event.is_set():
+            control.should_training_stop = True
+class LogStreamingCallback(TrainerCallback):
+    """
+    NEW: Intercepts training logs and pushes them to a queue
+    so the main thread can display them in the UI.
+    """
+    def __init__(self, log_queue: queue.Queue):
+        self.log_queue = log_queue
+    def _get_string(self, value):
+        if isinstance(value, float):
+            return f"{value:.4f}"
+        return str(value)
+    def on_log(self, args, state, control, logs=None, **kwargs):
+        if not logs:
+            return
+        metrics_map = {
+            "loss": "Loss",
+            "eval_loss": "Eval Loss",
+            "learning_rate": "LR",
+            "epoch": "Epoch"
+        }
+        log_parts = [f"📝 [Step {state.global_step}]"]
+        for key, label in metrics_map.items():
+            if key in logs:
+                val = logs[key]
+                # Format floats: use scientific notation for very small numbers (like LR)
+                if isinstance(val, (float, int)):
+                    val_str = f"{val:.4f}" if val > 1e-4 else f"{val:.2e}"
+                else:
+                    val_str = str(val)
+                log_parts.append(f"{label}: {val_str}")
+        self.log_queue.put(" | ".join(log_parts))
+class FunctionGemmaEngine:
+    def __init__(self, config: AppConfig):
+        self.config = config
+        self.model = None
+        self.tokenizer = None
+        self.imported_dataset = []
+        self.stop_event = threading.Event()
+        # NEW: State for tools
+        self.current_tools = DEFAULT_TOOLS
+        authenticate_hf(self.config.HF_TOKEN)
+        try:
+            self.refresh_data_and_model()
+        except Exception as e:
+            print(f"Initial load warning: {e}")
+    # NEW: Methods to handle Tool Schema updates
+    def get_tools_json(self) -> str:
+        return json.dumps(self.current_tools, indent=2)
+    def update_tools(self, json_str: str) -> str:
+        try:
+            new_tools = json.loads(json_str)
+            if not isinstance(new_tools, list):
+                return "Error: Schema must be a list of tool definitions."
+            self.current_tools = new_tools
+            return "✅ Tool Schema Updated successfully."
+        except json.JSONDecodeError as e:
+            return f"❌ JSON Error: {e}"
+        except Exception as e:
+            return f"❌ Error: {e}"
+    def refresh_data_and_model(self) -> str:
+        self.imported_dataset = []
+        try:
+            self.model, self.tokenizer = load_model_and_tokenizer(self.config.MODEL_NAME)
+            return "Model and data reloaded. Ready."
+        except Exception as e:
+            self.model = None
+            self.tokenizer = None
+            return f"CRITICAL ERROR: Model failed to load. {e}"
+    def load_csv(self, file_path: str) -> str:
+        try:
+            new_data = parse_csv_dataset(file_path)
+            if not new_data:
+                return "Error: File empty or format invalid."
+            self.imported_dataset = new_data
+            return f"Successfully imported {len(new_data)} samples."
+        except Exception as e:
+            return f"Import failed: {e}"
+    def trigger_stop(self):
+        self.stop_event.set()
+    def run_training_pipeline(self, epochs: int, learning_rate: float, test_size: float, shuffle_data: bool) -> Generator[str, None, None]:
+        if self.model is None:
+             yield "Training failed: Model is not loaded.", None
+             return
+        self.stop_event.clear()
+        output_buffer = f"⏳ Preparing Dataset (Test Split: {test_size}, Shuffle: {shuffle_data})...\n"
+        yield output_buffer, None
+        dataset, log = self._prepare_dataset()
+        if not dataset:
+            yield "Dataset creation failed.", None
+            return
+        output_buffer += log
+        yield output_buffer, None
+        if len(dataset) > 1:
+            dataset = dataset.train_test_split(test_size=test_size, shuffle=shuffle_data)
+        else:
+            dataset = {"train": dataset, "test": dataset}
+        # --- Phase 1: Pre-Training Eval ---
+        output_buffer += "\n📊 Evaluating Pre-Training Success Rate...\n"
+        yield output_buffer, None
+        pre_training_report = ""
+        for update in self._evaluate_model(dataset["test"]):
+            pre_training_report = update
+            if self.stop_event.is_set():
+                pre_training_report += "\n\n🛑 Manual Eval interrupted by user.\n"
+                yield f"{output_buffer}{pre_training_report}", None
+                break
+            yield f"{output_buffer}{pre_training_report}", None
+        if self.stop_event.is_set(): return
+        output_buffer += pre_training_report
+        # --- Phase 2: Training (Threaded) ---
+        output_buffer += "\n\n🚀 Starting Fine-tuning (Epochs: {epochs}, LR: {learning_rate})...\n"
+        yield output_buffer, None
+        log_queue = queue.Queue()
+        training_error = None
+        training_history = []
+        # Function to run in the thread
+        def train_wrapper():
+            nonlocal training_error, training_history
+            try:
+                training_history = self._execute_trainer(dataset, log_queue, epochs, learning_rate)
+            except Exception as e:
+                training_error = e
+        # Start training thread
+        train_thread = threading.Thread(target=train_wrapper)
+        train_thread.start()
+        # Monitor loop: Yields logs while training runs
+        while train_thread.is_alive():
+            # Drain the queue
+            while not log_queue.empty():
+                log_msg = log_queue.get()
+                output_buffer += f"{log_msg}\n"
+                yield output_buffer, None
+            # Check for stop signal
+            if self.stop_event.is_set():
+                yield f"{output_buffer}🛑 Stop signal sent. Waiting for trainer to wrap up...\n", None
+                # We don't break here, we wait for thread to finish cleanly
+            time.sleep(0.1) # Prevent CPU spinning
+        train_thread.join() # Ensure thread is completely done
+        # Flush any remaining logs
+        while not log_queue.empty():
+            log_msg = log_queue.get()
+            output_buffer += f"{log_msg}\n"
+            yield output_buffer, None
+        if training_error:
+            output_buffer += f"❌ Error during training: {training_error}\n"
+            yield output_buffer, None
+            return
+        if self.stop_event.is_set():
+            output_buffer += "🛑 Training manually stopped.\n"
+            yield output_buffer, None
+            return
+        output_buffer += "✅ Training finished.\n"
+        yield output_buffer, None
+        output_buffer += "\n📈 Generating Loss Plot...\n"
+        yield output_buffer, None
+        try:
+            final_plot = self._generate_loss_plot(training_history)
+            yield output_buffer, final_plot
+        except Exception as e:
+            output_buffer += f"⚠️ Could not generate plot: {e}\n"
+            yield output_buffer, None
+        # --- Phase 3: Post-Training Eval ---
+        output_buffer += "\n📊 Evaluating Post-Training Success Rate...\n"
+        yield output_buffer, final_plot
+        post_training_report = ""
+        for update in self._evaluate_model(dataset["test"]):
+            post_training_report = update
+            if self.stop_event.is_set():
+                post_training_report += "\n\n🛑 Manual Eval interrupted by user.\n"
+                yield f"{output_buffer}{post_training_report}", final_plot
+                break
+            yield f"{output_buffer}{post_training_report}", final_plot
+    def _prepare_dataset(self):
+        # NEW: Use partial to inject self.current_tools into the formatting function
+        formatting_fn = partial(create_conversation_format, tools_list=self.current_tools)
+        if not self.imported_dataset:
+            ds = load_dataset(self.config.DEFAULT_DATASET, split="train").map(formatting_fn)
+            log = f" `-> using default dataset (size:{len(ds)})\n"
+        else:
+            dataset_as_dicts = [{
+                "user_content": row[0], "tool_name": row[1], "tool_arguments": row[2]}
+                for row in self.imported_dataset
+            ]
+            ds = Dataset.from_list(dataset_as_dicts).map(formatting_fn)
+            log = f" `-> using custom dataset (size:{len(ds)})\n"
+        return ds, log
+    def _execute_trainer(self, dataset, log_queue: queue.Queue, epochs: int, learning_rate: float) -> List[Dict]:
+        torch_dtype = self.model.dtype
+        args = SFTConfig(
+            output_dir=str(self.config.OUTPUT_DIR),
+            max_length=512,
+            packing=False,
+            num_train_epochs=epochs,
+            per_device_train_batch_size=4,
+            logging_steps=1,
+            save_strategy="no",
+            eval_strategy="epoch",
+            learning_rate=learning_rate,
+            fp16=(torch_dtype == torch.float16),
+            bf16=(torch_dtype == torch.bfloat16),
+            report_to="none",
+            dataset_kwargs={"add_special_tokens": False, "append_concat_token": True}
+        )
+        trainer = SFTTrainer(
+            model=self.model,
+            args=args,
+            train_dataset=dataset['train'],
+            eval_dataset=dataset['test'],
+            processing_class=self.tokenizer,
+            callbacks=[
+                AbortCallback(self.stop_event),
+                LogStreamingCallback(log_queue)
+            ]
+        )
+        trainer.train()
+        trainer.save_model()
+        return trainer.state.log_history
+    def _generate_loss_plot(self, history: list):
+        if not history:
+            return None
+        # Extract Training Loss
+        # log_history format: [{'loss': 0.5, 'step': 1}, {'eval_loss': 0.4, 'step': 1}, ...]
+        train_steps = [x['step'] for x in history if 'loss' in x]
+        train_loss = [x['loss'] for x in history if 'loss' in x]
+        # Extract Validation Loss
+        eval_steps = [x['step'] for x in history if 'eval_loss' in x]
+        eval_loss = [x['eval_loss'] for x in history if 'eval_loss' in x]
+        fig, ax = plt.subplots(figsize=(10, 5))
+        if train_steps:
+            ax.plot(train_steps, train_loss, label='Training Loss', linestyle='-', marker=None)
+        if eval_steps:
+            ax.plot(eval_steps, eval_loss, label='Validation Loss', linestyle='--', marker='o')
+        ax.set_xlabel("Steps")
+        ax.set_ylabel("Loss")
+        ax.set_title("Training & Validation Loss")
+        ax.legend()
+        ax.grid(True, linestyle=':', alpha=0.6)
+        plt.tight_layout()
+        return fig
+    def _evaluate_model(self, test_dataset) -> Generator[str, None, None]:
+        results = []
+        success_count = 0
+        for idx, item in enumerate(test_dataset):
+            messages = item["messages"][:2]
+            try:
+                # NEW: Pass self.current_tools to the template
+                inputs = self.tokenizer.apply_chat_template(
+                    messages, tools=self.current_tools, add_generation_prompt=True, return_dict=True, return_tensors="pt"
+                )
+                device = self.model.device
+                inputs = {k: v.to(device) for k, v in inputs.items()}
+                out = self.model.generate(
+                    **inputs,
+                    pad_token_id=self.tokenizer.eos_token_id,
+                    max_new_tokens=128
+                )
+                output = self.tokenizer.decode(out[0][len(inputs["input_ids"][0]):], skip_special_tokens=True)
+                log_entry = f"{idx+1}. Prompt: {messages[1]['content']}\n   Output: {output[:100]}..."
+                # Check tool correctness
+                expected_tool = item['messages'][2]['tool_calls'][0]['function']['name']
+                if expected_tool in output:
+                    log_entry += "\n   -> ✅ Correct Tool"
+                    success_count += 1
+                else:
+                    log_entry += f"\n   -> ❌ Wrong Tool (Expected: {expected_tool})"
+                results.append(log_entry)
+                yield "\n".join(results) + f"\n\nRunning Success Rate: {success_count}/{idx+1}"
+            except Exception as e:
+                yield f"Error during inference: {e}"
+    def get_zip_path(self) -> Optional[str]:
+        if not self.config.OUTPUT_DIR.exists():
+            return None
+        timestamp = int(time.time())
+        base_name = str(self.config.ARTIFACTS_DIR.joinpath(f"functiongemma_finetuned_{timestamp}"))
+        return zip_directory(str(self.config.OUTPUT_DIR), base_name)

requirements.txt CHANGED Viewed

@@ -1,5 +1,6 @@
 accelerate
 datasets
 gradio
 transformers
 trl

 accelerate
 datasets
 gradio
+matplotlib
 transformers
 trl

tools.py ADDED Viewed

	@@ -0,0 +1,45 @@

+# --- Tool Definitions ---
+# (Existing python functions search_knowledge_base/search_google remain here for reference,
+# but the schema below is what matters for the LLM)
+search_knowledge_base_schema = {
+  "type": "function",
+  "function": {
+    "name": "search_knowledge_base",
+    "description": "Search internal company documents, policies and project data.",
+    "parameters": {
+      "type": "object",
+      "properties": {
+        "query": {
+          "type": "string",
+          "description": "query string"
+        }
+      },
+      "required": ["query"]
+    },
+    "return": {"type": "string"}
+  }
+}
+search_google_schema = {
+  "type": "function",
+  "function": {
+    "name": "search_google",
+    "description": "Search public information.",
+    "parameters": {
+      "type": "object",
+      "properties": {
+        "query": {
+          "type": "string",
+          "description": "query string"
+        }
+      },
+      "required": ["query"]
+    },
+    "return": {"type": "string"}
+  }
+}
+# Renamed to DEFAULT_TOOLS to imply modifiability
+DEFAULT_TOOLS = [search_knowledge_base_schema, search_google_schema]
+DEFAULT_SYSTEM_MSG = "You are a model that can do function calling with the following functions"

ui.py ADDED Viewed

	@@ -0,0 +1,149 @@

+import gradio as gr
+from engine import FunctionGemmaEngine
+def build_interface(engine: FunctionGemmaEngine) -> gr.Blocks:
+    with gr.Blocks(title="FunctionGemma Modkit") as demo:
+        gr.Markdown("# 🤖 FunctionGemma Modkit: Fine-Tuning")
+        gr.Markdown("Fine-tune FunctionGemma to understand your custom functions.")
+        with gr.Tabs():
+            # --- TAB 1: PREPARING DATASET ---
+            with gr.TabItem("1. Preparing Dataset"):
+                gr.Markdown("### 🛠️ Tool Schema & Data Import")
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        gr.Markdown("**Step 1: Define Functions**\n\nEdit the JSON schema below to define the tools the model should learn.")
+                        tools_editor = gr.Code(
+                            value=engine.get_tools_json(),
+                            language="json",
+                            label="Tool Definitions (JSON Schema)",
+                            lines=15
+                        )
+                        update_tools_btn = gr.Button("💾 Update Tool Schema")
+                        tools_status = gr.Markdown("")
+                    with gr.Column(scale=1):
+                        gr.Markdown("**Step 2: Upload Data (Optional)**\n\nUpload a CSV file to replace the default dataset.\nFormat: `[User Prompt, Tool Name, Tool Args JSON]`")
+                        import_file = gr.File(
+                            label="Upload Dataset (.csv)",
+                            file_types=[".csv"],
+                            height=100
+                        )
+                        import_status = gr.Markdown("")
+            # --- TAB 2: TRAINING ---
+            with gr.TabItem("2. Training"):
+                gr.Markdown("### 🚀 Fine-Tuning Configuration")
+                with gr.Group():
+                    gr.Markdown("**Hyperparameters**")
+                    with gr.Row():
+                        param_epochs = gr.Slider(
+                            minimum=1, maximum=20, value=5, step=1,
+                            label="Epochs", info="Total training passes"
+                        )
+                        param_lr = gr.Number(
+                            value=5e-5,
+                            label="Learning Rate",
+                            info="e.g. 5e-5"
+                        )
+                        param_test_size = gr.Slider(
+                            minimum=0.1, maximum=0.9, value=0.2, step=0.05,
+                            label="Test Split", info="Validation data ratio. Typical value is 0.2 (80% for training, 20% for testing)"
+                        )
+                        param_shuffle = gr.Checkbox(
+                            value=True,
+                            label="Shuffle Data",
+                            info="Randomize before split"
+                        )
+                with gr.Row():
+                    run_training_btn = gr.Button("🚀 Run Fine-Tuning", variant="primary", scale=2)
+                    stop_training_btn = gr.Button("🛑 Stop", variant="stop", visible=False, scale=1)
+                    clear_reload_btn = gr.Button("🔄 Reset", variant="secondary", scale=1)
+                with gr.Row():
+                    # Left column: Text Logs
+                    output_display = gr.Textbox(
+                        lines=20,
+                        label="Logs & Results",
+                        value="Ready.",
+                        interactive=False,
+                        autoscroll=True
+                    )
+                    # Right column: Plot (NEW)
+                    loss_plot = gr.Plot(label="Training Metrics")
+            # --- TAB 3: EXPORT ---
+            with gr.TabItem("3. Export"):
+                gr.Markdown("### 📦 Export Trained Model")
+                gr.Markdown("Download the fine-tuned LoRA adapters or full model weights (depending on configuration) as a ZIP file.")
+                with gr.Row():
+                    zip_btn = gr.Button("⬇️ Prepare Model ZIP", variant="primary", scale=1)
+                    download_file = gr.File(label="Download Archive", interactive=False, scale=2)
+        # --- EVENT WIRING ---
+        # Tab 1: Tools
+        update_tools_btn.click(
+            fn=engine.update_tools,
+            inputs=[tools_editor],
+            outputs=[tools_status]
+        )
+        # Tab 1: File Import
+        import_file.upload(
+            fn=engine.load_csv,
+            inputs=[import_file],
+            outputs=[import_status]
+        )
+        # Tab 2: Training
+        run_training_btn.click(
+            fn=lambda: (
+                gr.update(visible=False), # Hide Run
+                gr.update(interactive=False), # Disable Reset
+                gr.update(visible=True)   # Show Stop
+            ),
+            outputs=[run_training_btn, clear_reload_btn, stop_training_btn]
+        ).then(
+            fn=engine.run_training_pipeline,
+            inputs=[param_epochs, param_lr, param_test_size, param_shuffle],
+            outputs=[output_display, loss_plot],
+        ).then(
+            fn=lambda: (
+                gr.update(visible=True),  # Show Run
+                gr.update(interactive=True), # Enable Reset
+                gr.update(visible=False)  # Hide Stop
+            ),
+            outputs=[run_training_btn, clear_reload_btn, stop_training_btn]
+        )
+        # Tab 2: Stop
+        stop_training_btn.click(
+            fn=lambda: (engine.trigger_stop(), "Stopping...")[1],
+            outputs=None
+        )
+        # Tab 2: Reset
+        clear_reload_btn.click(
+            fn=engine.refresh_data_and_model,
+            outputs=[output_display]
+        )
+        # Tab 3: Download
+        def handle_zip():
+            path = engine.get_zip_path()
+            if path:
+                return gr.update(value=path, visible=True)
+            return gr.update(value=None, visible=False)
+        zip_btn.click(
+            fn=handle_zip,
+            outputs=[download_file]
+        )
+    return demo

utils.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import os
+import csv
+import json
+import shutil
+from typing import Optional, List, Any
+from huggingface_hub import login
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from tools import DEFAULT_SYSTEM_MSG
+# Note: We do NOT import TOOLS here anymore to avoid stale data
+def authenticate_hf(token: Optional[str]) -> None:
+    """Logs into the Hugging Face Hub."""
+    if token:
+        print("Logging into Hugging Face Hub...")
+        login(token=token)
+    else:
+        print("Skipping Hugging Face login: HF_TOKEN not set.")
+def load_model_and_tokenizer(model_name: str):
+    print(f"Loading Transformer model: {model_name}")
+    try:
+        target_model = model_name
+        if model_name.startswith("..") and not os.path.exists(model_name):
+            print(f"Warning: Local path {model_name} not found. Falling back to default hub model.")
+            target_model = "google/gemma-2b-it"
+        tokenizer = AutoTokenizer.from_pretrained(target_model)
+        model = AutoModelForCausalLM.from_pretrained(target_model)
+        print("Model loaded successfully.")
+        return model, tokenizer
+    except Exception as e:
+        print(f"Error loading Transformer model {target_model}: {e}")
+        raise e
+# UPDATED: Now accepts tools_list as an argument
+def create_conversation_format(sample, tools_list):
+    """Formats a dataset row into the conversational format required for SFT."""
+    try:
+        tool_args = json.loads(sample["tool_arguments"])
+    except (json.JSONDecodeError, TypeError):
+        tool_args = {}
+    return {
+        "messages": [
+            {"role": "developer", "content": DEFAULT_SYSTEM_MSG},
+            {"role": "user", "content": sample["user_content"]},
+            {"role": "assistant", "tool_calls": [{"type": "function", "function": {"name": sample["tool_name"], "arguments": tool_args}}]},
+        ],
+        "tools": tools_list # Injects the dynamic tools
+    }
+def parse_csv_dataset(file_path: str) -> List[List[str]]:
+    """Parses an uploaded CSV file."""
+    dataset = []
+    if not file_path:
+        return dataset
+    with open(file_path, 'r', newline='', encoding='utf-8') as f:
+        reader = csv.reader(f)
+        try:
+            header = next(reader)
+            if not (header and "user_content" in header[0].lower()):
+                f.seek(0)
+        except StopIteration:
+            return dataset
+        for row in reader:
+            if len(row) >= 3:
+                dataset.append([s.strip() for s in row[:3]])
+    return dataset
+def zip_directory(source_dir: str, output_name_base: str) -> str:
+    """Zips a directory."""
+    return shutil.make_archive(
+        base_name=output_name_base,
+        format='zip',
+        root_dir=source_dir,
+    )