""" Configuration file for RAG Analytics Application """ import os # Data folder configuration DATA_FOLDER = os.environ.get("DATA_FOLDER", "./data") # Required columns after normalization REQUIRED_COLUMNS = { 'test_id', 'config_purpose', 'dataset_name' } # Metric columns that need numeric conversion to float METRIC_COLUMNS = [ 'rmse_relevance', 'rmse_utilization', 'rmse_completeness', 'f1_score', 'aucroc' ] # Numeric configuration columns (also need float conversion) NUMERIC_CONFIG_COLUMNS = [ 'chunk_size', 'overlap', 'stride', 'alpha', 'retr_k', 'final_k', 'summ_max', 'summ_min', 'test_id' ] # Column mapping for normalization COLUMN_MAP = { 'test': 'test_id', 'configurationpurpose': 'config_purpose', 'subsets': 'dataset_name', 'embeddingmodel': 'embedding_model', 'rerankermodel': 'reranker_model', 'summarizationmodel': 'summarization_model', 'summarization': 'summarization', # New column name (enabled/disabled) 'chunkingstrategy': 'chunking_strategy', 'chunksize': 'chunk_size', 'overlap': 'overlap', 'stride': 'stride', 'retreivalstrategy': 'retrieval_strategy', 'retrievalstrategy': 'retrieval_strategy', # Catch typo 'alpha': 'alpha', 'retrk': 'retr_k', 'finalk': 'final_k', 'repacking': 'repacking', 'summmax': 'summ_max', 'summmin': 'summ_min', '8bgptlabel': 'gpt_label', # Metrics 'rmsetracerelevance': 'rmse_relevance', 'rmsetraceutilization': 'rmse_utilization', 'rmsetracecompleteness': 'rmse_completeness', 'aucroc': 'aucroc', 'f1score': 'f1_score', 'failedtotalsamples': 'failed_samples' } # Metadata columns (excluded from constant/variable analysis) METADATA_COLUMNS = [ 'rmse_relevance', 'rmse_utilization', 'rmse_completeness', 'aucroc', 'f1_score', 'test_id', 'config_purpose', 'dataset_name' ] # Debug mode DEBUG = True