File size: 2,727 Bytes
91f974c
 
 
b924bc1
01ec3c1
 
 
b924bc1
 
f9322c0
b924bc1
f9322c0
 
 
 
 
91f974c
 
 
 
 
 
0a1d4cf
91f974c
 
 
 
 
 
 
 
 
 
 
b924bc1
91f974c
 
 
 
 
 
 
 
 
b924bc1
91f974c
 
 
 
 
b924bc1
91f974c
 
 
 
 
 
 
 
 
b924bc1
91f974c
 
 
 
 
 
 
 
 
 
b924bc1
91f974c
 
 
0a1d4cf
91f974c
 
2e540d6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
from typing import Dict, Optional, List
from dataclasses import dataclass
from haystack.dataclasses import ChatMessage
import os
import logging

logger = logging.getLogger(__name__)

def load_prompt_template(filename: str) -> str:
    """Load prompt template from txt file."""
    prompt_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "prompts", filename)
    logger.info(f"Loading prompt template: {filename} from {prompt_path}")
    with open(prompt_path, "r", encoding="utf-8") as f:
        content = f.read().strip()
        logger.info(f"✅ Successfully loaded prompt template: {filename} ({len(content)} chars)")
        return content

@dataclass
class DatasetConfig:
    name: str
    split: str = "train"
    content_field: str = "content"
    fields: Optional[Dict[str, str]] = None  # Dictionary of field mappings
    prompt_template: Optional[str] = None

# Default configurations for different datasets
DATASET_CONFIGS = {
    "awesome-chatgpt-prompts": DatasetConfig(
        name="fka/awesome-chatgpt-prompts",
        content_field="prompt",
        fields={
            "role": "act",
            "prompt": "prompt"
        },
        prompt_template=load_prompt_template("awesome-chatgpt-prompts.txt")
    ),
    "settings-dataset": DatasetConfig(
        name="syntaxhacker/rag_pipeline",
        content_field="context",
        fields={
            "question": "question",
            "answer": "answer",
            "context": "context"
        },
        prompt_template=load_prompt_template("settings-dataset.txt")
    ),
    "seven-wonders": DatasetConfig(
        name="bilgeyucel/seven-wonders",
        content_field="content",
        fields={},  # No additional fields needed
        prompt_template=load_prompt_template("seven-wonders.txt")
    ),
    "psychology-dataset": DatasetConfig(
        name="jkhedri/psychology-dataset",
        split="train",
        content_field="question",  # Assuming we want to use the question as the content
        fields={
            "response_j": "response_j",  # Response from one model
            "response_k": "response_k"   # Response from another model
        },
        prompt_template=load_prompt_template("psychology-dataset.txt")
    ),
    "developer-portfolio": DatasetConfig(
        name="syntaxhacker/developer-portfolio-rag",
        split="train",
        content_field="answer",
        fields={
            "question": "question",
            "answer": "answer",
            "context": "context"
        },
        prompt_template=load_prompt_template("developer-portfolio.txt")
    ),
}

# Default configuration for embedding model
MODEL_CONFIG = {
    "embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
}