Spaces:

DunasAB
/

llm-chat-project

Sleeping

DunasAnastasiia

Initial commit (Xet)

7c2e31a 4 months ago

880 Bytes

	from __future__ import annotations

	from dataclasses import dataclass


	@dataclass(frozen=True)
	class Settings:
	# Hugging Face dataset
	dataset_name: str = "rag-datasets/rag-mini-wikipedia"
	corpus_config: str = "text-corpus"
	qa_config: str = "question-answer"

	# Chunking
	chunk_chars: int = 900
	overlap_chars: int = 150

	# Retrieval
	top_k_bm25: int = 8
	top_k_dense: int = 8
	top_k_final: int = 6

	# Dense model
	embed_model: str = "sentence-transformers/all-MiniLM-L6-v2"

	# Optional reranker
	rerank_model: str = "cross-encoder/ms-marco-MiniLM-L-6-v2"
	rerank_top_n: int = 20 # candidates to rerank

	# OpenAI
	default_openai_model: str = "gpt-4o-mini"

	# Artifacts
	artifacts_dir: str = "artifacts"
	chunks_jsonl: str = "chunks.jsonl"
	embeddings_npy: str = "embeddings.npy"


	SETTINGS = Settings()