rag12-analytics / config.py
npuliga's picture
updated files
b6f27fa
"""
Configuration file for RAG Analytics Application
"""
import os
# Data folder configuration
DATA_FOLDER = os.environ.get("DATA_FOLDER", "./data")
# Required columns after normalization
REQUIRED_COLUMNS = {
'test_id',
'config_purpose',
'dataset_name'
}
# Metric columns that need numeric conversion to float
METRIC_COLUMNS = [
'rmse_relevance',
'rmse_utilization',
'rmse_completeness',
'f1_score',
'aucroc'
]
# Numeric configuration columns (also need float conversion)
NUMERIC_CONFIG_COLUMNS = [
'chunk_size',
'overlap',
'stride',
'alpha',
'retr_k',
'final_k',
'summ_max',
'summ_min',
'test_id'
]
# Column mapping for normalization
COLUMN_MAP = {
'test': 'test_id',
'configurationpurpose': 'config_purpose',
'subsets': 'dataset_name',
'embeddingmodel': 'embedding_model',
'rerankermodel': 'reranker_model',
'summarizationmodel': 'summarization_model',
'summarization': 'summarization', # New column name (enabled/disabled)
'chunkingstrategy': 'chunking_strategy',
'chunksize': 'chunk_size',
'overlap': 'overlap',
'stride': 'stride',
'retreivalstrategy': 'retrieval_strategy',
'retrievalstrategy': 'retrieval_strategy', # Catch typo
'alpha': 'alpha',
'retrk': 'retr_k',
'finalk': 'final_k',
'repacking': 'repacking',
'summmax': 'summ_max',
'summmin': 'summ_min',
'8bgptlabel': 'gpt_label',
# Metrics
'rmsetracerelevance': 'rmse_relevance',
'rmsetraceutilization': 'rmse_utilization',
'rmsetracecompleteness': 'rmse_completeness',
'aucroc': 'aucroc',
'f1score': 'f1_score',
'failedtotalsamples': 'failed_samples'
}
# Metadata columns (excluded from constant/variable analysis)
METADATA_COLUMNS = [
'rmse_relevance', 'rmse_utilization', 'rmse_completeness',
'aucroc', 'f1_score',
'test_id', 'config_purpose', 'dataset_name'
]
# Debug mode
DEBUG = True