Spaces:

YuITC
/

arXivRAG-Multimodal-Conversational-RAG-System

Runtime error

YuITC

quick fix

4fb4db2 7 months ago

1.21 kB

	"""
	Configuration settings for the arXivCSRAG application.
	"""
	import os
	import torch
	from pathlib import Path
	from dotenv import load_dotenv
	from huggingface_hub import whoami


	# Load environment variables
	load_dotenv()
	# user = whoami(token=os.getenv('HF_TOKEN'))


	# Base paths
	ROOT_DIR = Path(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	TEMP_DIR = ROOT_DIR / 'temp'
	if not TEMP_DIR.exists(): TEMP_DIR.mkdir(parents=True, exist_ok=True)


	# PDF Extraction Configuration
	PDF_EXTRACTION_CONFIG = {
	'infer_table_structure' : True,
	'strategy' : 'hi_res',
	'extract_image_block_types' : ['Image'],
	'extract_image_block_to_payload': True,
	'chunking_strategy' : 'by_title',
	'max_characters' : 10000,
	'combine_text_under_n_chars' : 2000,
	'new_after_n_chars' : 6000
	}


	# LLM & Embedding model Configuration
	MODEL_NAME = 'gemini-2.5-flash-lite-preview-06-17'
	# EMBEDDING_MODEL = 'BAAI/bge-base-en-v1.5'
	EMBEDDING_MODEL = 'BAAI/bge-m3' # Multi-lingual BGE model
	DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'


	# Vector Store Configuration
	COLLECTION_NAME = 'arXiv_CS_RAG'