""" System Prompt Configuration Module ===================================== Manage global system prompts, prepend to samples, and preview formatted chat JSON. """ from dataclasses import dataclass from typing import List, Dict, Any, Optional import json import pandas as pd @dataclass class SystemPromptConfig: """Configuration for system prompt handling.""" system_prompt: str = "You are a helpful AI assistant." prepend_to_all: bool = True def build_chat_json( instruction: str, output: str, system_prompt: str = "", context: str = "", ) -> Dict[str, Any]: """ Build a single chat-format JSON entry. Returns {"messages": [{"role": ..., "content": ...}, ...]}. """ messages = [] if system_prompt: messages.append({"role": "system", "content": system_prompt}) user_content = instruction if context: user_content += f"\n\nContext: {context}" messages.append({"role": "user", "content": user_content}) messages.append({"role": "assistant", "content": output}) return {"messages": messages} def preview_formatted( df: pd.DataFrame, system_prompt: str, instruction_col: str, output_col: str, input_col: Optional[str] = None, n: int = 3, ) -> List[Dict[str, Any]]: """ Generate a preview of n formatted chat-JSON samples. """ previews = [] for i, (_, row) in enumerate(df.head(n).iterrows()): instruction = str(row.get(instruction_col, '')) output = str(row.get(output_col, '')) context = str(row.get(input_col, '')) if input_col and input_col in df.columns else '' previews.append( build_chat_json(instruction, output, system_prompt, context) ) return previews def preview_formatted_json( df: pd.DataFrame, system_prompt: str, instruction_col: str, output_col: str, input_col: Optional[str] = None, n: int = 3, ) -> str: """Return a pretty-printed JSON string of n sample entries.""" samples = preview_formatted( df, system_prompt, instruction_col, output_col, input_col, n ) return json.dumps(samples, indent=2, ensure_ascii=False)