File size: 2,727 Bytes
91f974c b924bc1 01ec3c1 b924bc1 f9322c0 b924bc1 f9322c0 91f974c 0a1d4cf 91f974c b924bc1 91f974c b924bc1 91f974c b924bc1 91f974c b924bc1 91f974c b924bc1 91f974c 0a1d4cf 91f974c 2e540d6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
from typing import Dict, Optional, List
from dataclasses import dataclass
from haystack.dataclasses import ChatMessage
import os
import logging
logger = logging.getLogger(__name__)
def load_prompt_template(filename: str) -> str:
"""Load prompt template from txt file."""
prompt_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "prompts", filename)
logger.info(f"Loading prompt template: {filename} from {prompt_path}")
with open(prompt_path, "r", encoding="utf-8") as f:
content = f.read().strip()
logger.info(f"✅ Successfully loaded prompt template: {filename} ({len(content)} chars)")
return content
@dataclass
class DatasetConfig:
name: str
split: str = "train"
content_field: str = "content"
fields: Optional[Dict[str, str]] = None # Dictionary of field mappings
prompt_template: Optional[str] = None
# Default configurations for different datasets
DATASET_CONFIGS = {
"awesome-chatgpt-prompts": DatasetConfig(
name="fka/awesome-chatgpt-prompts",
content_field="prompt",
fields={
"role": "act",
"prompt": "prompt"
},
prompt_template=load_prompt_template("awesome-chatgpt-prompts.txt")
),
"settings-dataset": DatasetConfig(
name="syntaxhacker/rag_pipeline",
content_field="context",
fields={
"question": "question",
"answer": "answer",
"context": "context"
},
prompt_template=load_prompt_template("settings-dataset.txt")
),
"seven-wonders": DatasetConfig(
name="bilgeyucel/seven-wonders",
content_field="content",
fields={}, # No additional fields needed
prompt_template=load_prompt_template("seven-wonders.txt")
),
"psychology-dataset": DatasetConfig(
name="jkhedri/psychology-dataset",
split="train",
content_field="question", # Assuming we want to use the question as the content
fields={
"response_j": "response_j", # Response from one model
"response_k": "response_k" # Response from another model
},
prompt_template=load_prompt_template("psychology-dataset.txt")
),
"developer-portfolio": DatasetConfig(
name="syntaxhacker/developer-portfolio-rag",
split="train",
content_field="answer",
fields={
"question": "question",
"answer": "answer",
"context": "context"
},
prompt_template=load_prompt_template("developer-portfolio.txt")
),
}
# Default configuration for embedding model
MODEL_CONFIG = {
"embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
}
|