Spaces:
Configuration error
Configuration error
File size: 2,259 Bytes
d4398e6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 | """
System Prompt Configuration Module
=====================================
Manage global system prompts, prepend to samples,
and preview formatted chat JSON.
"""
from dataclasses import dataclass
from typing import List, Dict, Any, Optional
import json
import pandas as pd
@dataclass
class SystemPromptConfig:
"""Configuration for system prompt handling."""
system_prompt: str = "You are a helpful AI assistant."
prepend_to_all: bool = True
def build_chat_json(
instruction: str,
output: str,
system_prompt: str = "",
context: str = "",
) -> Dict[str, Any]:
"""
Build a single chat-format JSON entry.
Returns {"messages": [{"role": ..., "content": ...}, ...]}.
"""
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
user_content = instruction
if context:
user_content += f"\n\nContext: {context}"
messages.append({"role": "user", "content": user_content})
messages.append({"role": "assistant", "content": output})
return {"messages": messages}
def preview_formatted(
df: pd.DataFrame,
system_prompt: str,
instruction_col: str,
output_col: str,
input_col: Optional[str] = None,
n: int = 3,
) -> List[Dict[str, Any]]:
"""
Generate a preview of n formatted chat-JSON samples.
"""
previews = []
for i, (_, row) in enumerate(df.head(n).iterrows()):
instruction = str(row.get(instruction_col, ''))
output = str(row.get(output_col, ''))
context = str(row.get(input_col, '')) if input_col and input_col in df.columns else ''
previews.append(
build_chat_json(instruction, output, system_prompt, context)
)
return previews
def preview_formatted_json(
df: pd.DataFrame,
system_prompt: str,
instruction_col: str,
output_col: str,
input_col: Optional[str] = None,
n: int = 3,
) -> str:
"""Return a pretty-printed JSON string of n sample entries."""
samples = preview_formatted(
df, system_prompt, instruction_col, output_col, input_col, n
)
return json.dumps(samples, indent=2, ensure_ascii=False)
|