Spaces:
Configuration error
Configuration error
| """ | |
| System Prompt Configuration Module | |
| ===================================== | |
| Manage global system prompts, prepend to samples, | |
| and preview formatted chat JSON. | |
| """ | |
| from dataclasses import dataclass | |
| from typing import List, Dict, Any, Optional | |
| import json | |
| import pandas as pd | |
| class SystemPromptConfig: | |
| """Configuration for system prompt handling.""" | |
| system_prompt: str = "You are a helpful AI assistant." | |
| prepend_to_all: bool = True | |
| def build_chat_json( | |
| instruction: str, | |
| output: str, | |
| system_prompt: str = "", | |
| context: str = "", | |
| ) -> Dict[str, Any]: | |
| """ | |
| Build a single chat-format JSON entry. | |
| Returns {"messages": [{"role": ..., "content": ...}, ...]}. | |
| """ | |
| messages = [] | |
| if system_prompt: | |
| messages.append({"role": "system", "content": system_prompt}) | |
| user_content = instruction | |
| if context: | |
| user_content += f"\n\nContext: {context}" | |
| messages.append({"role": "user", "content": user_content}) | |
| messages.append({"role": "assistant", "content": output}) | |
| return {"messages": messages} | |
| def preview_formatted( | |
| df: pd.DataFrame, | |
| system_prompt: str, | |
| instruction_col: str, | |
| output_col: str, | |
| input_col: Optional[str] = None, | |
| n: int = 3, | |
| ) -> List[Dict[str, Any]]: | |
| """ | |
| Generate a preview of n formatted chat-JSON samples. | |
| """ | |
| previews = [] | |
| for i, (_, row) in enumerate(df.head(n).iterrows()): | |
| instruction = str(row.get(instruction_col, '')) | |
| output = str(row.get(output_col, '')) | |
| context = str(row.get(input_col, '')) if input_col and input_col in df.columns else '' | |
| previews.append( | |
| build_chat_json(instruction, output, system_prompt, context) | |
| ) | |
| return previews | |
| def preview_formatted_json( | |
| df: pd.DataFrame, | |
| system_prompt: str, | |
| instruction_col: str, | |
| output_col: str, | |
| input_col: Optional[str] = None, | |
| n: int = 3, | |
| ) -> str: | |
| """Return a pretty-printed JSON string of n sample entries.""" | |
| samples = preview_formatted( | |
| df, system_prompt, instruction_col, output_col, input_col, n | |
| ) | |
| return json.dumps(samples, indent=2, ensure_ascii=False) | |