from typing import Dict, Optional, List from dataclasses import dataclass from haystack.dataclasses import ChatMessage import os import logging logger = logging.getLogger(__name__) def load_prompt_template(filename: str) -> str: """Load prompt template from txt file.""" prompt_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "prompts", filename) logger.info(f"Loading prompt template: {filename} from {prompt_path}") with open(prompt_path, "r", encoding="utf-8") as f: content = f.read().strip() logger.info(f"✅ Successfully loaded prompt template: {filename} ({len(content)} chars)") return content @dataclass class DatasetConfig: name: str split: str = "train" content_field: str = "content" fields: Optional[Dict[str, str]] = None # Dictionary of field mappings prompt_template: Optional[str] = None # Default configurations for different datasets DATASET_CONFIGS = { "awesome-chatgpt-prompts": DatasetConfig( name="fka/awesome-chatgpt-prompts", content_field="prompt", fields={ "role": "act", "prompt": "prompt" }, prompt_template=load_prompt_template("awesome-chatgpt-prompts.txt") ), "settings-dataset": DatasetConfig( name="syntaxhacker/rag_pipeline", content_field="context", fields={ "question": "question", "answer": "answer", "context": "context" }, prompt_template=load_prompt_template("settings-dataset.txt") ), "seven-wonders": DatasetConfig( name="bilgeyucel/seven-wonders", content_field="content", fields={}, # No additional fields needed prompt_template=load_prompt_template("seven-wonders.txt") ), "psychology-dataset": DatasetConfig( name="jkhedri/psychology-dataset", split="train", content_field="question", # Assuming we want to use the question as the content fields={ "response_j": "response_j", # Response from one model "response_k": "response_k" # Response from another model }, prompt_template=load_prompt_template("psychology-dataset.txt") ), "developer-portfolio": DatasetConfig( name="syntaxhacker/developer-portfolio-rag", split="train", content_field="answer", fields={ "question": "question", "answer": "answer", "context": "context" }, prompt_template=load_prompt_template("developer-portfolio.txt") ), } # Default configuration for embedding model MODEL_CONFIG = { "embedding_model": "sentence-transformers/all-MiniLM-L6-v2", }