Spaces:

edubotics
/

cs111_assistant

Sleeping

App Files Files Community

cs111_assistant / apps /ai_tutor /config /config.yml

faridkarimli

Initial setup from EduBotics App

b2841f9 verified about 1 year ago

raw

history blame contribute delete

2.07 kB

	log_dir: "storage/logs" # str
	log_chunk_dir: "storage/logs/chunks" # str
	device: "cpu" # str [cuda, cpu]

	vectorstore:
	load_from_HF: false # bool
	reparse_files: true # bool
	data_path: "storage/data" # str
	url_file_path: "storage/data/urls.txt" # str
	expand_urls: false # bool
	db_option: "FAISS" # str [FAISS, Chroma, RAGatouille, RAPTOR]
	db_path: "vectorstores" # str
	model: "text-embedding-ada-002" # str [sentence-transformers/all-MiniLM-L6-v2, text-embedding-ada-002']
	search_top_k: 5 # int
	score_threshold: 0.3 # float

	faiss_params: # Not used as of now
	index_path: "vectorstores/faiss.index" # str
	index_type: "Flat" # str [Flat, HNSW, IVF]
	index_dimension: 384 # int
	index_nlist: 100 # int
	index_nprobe: 10 # int

	colbert_params:
	index_name: "new_idx" # str

	llm_params:
	llm_arch: "langchain" # [langchain]
	use_history: true # bool
	generate_follow_up: false # bool
	memory_window: 3 # int
	llm_style: "Normal" # str [Normal, ELI5]
	llm_loader: "gpt-4o-mini" # str [local_llm, gpt-3.5-turbo-1106, gpt-4, gpt-4o-mini]
	openai_params:
	temperature: 0.7 # float
	local_llm_params:
	temperature: 0.7 # float
	repo_id: "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF" # HuggingFace repo id
	filename: "tinyllama-1.1b-chat-v1.0.Q5_0.gguf" # Specific name of gguf file in the repo
	model_path: "storage/models/tinyllama-1.1b-chat-v1.0.Q5_0.gguf" # Path to the model file
	stream: false # bool
	pdf_reader: "gpt" # str [llama, pymupdf, gpt]

	chat_logging:
	log_chat: true # bool
	platform: "literalai"
	callbacks: true # bool

	splitter_options:
	use_splitter: true # bool
	split_by_token: true # bool
	remove_leftover_delimiters: true # bool
	remove_chunks: false # bool
	chunking_mode: "semantic" # str [fixed, semantic]
	chunk_size: 300 # int
	chunk_overlap: 30 # int
	chunk_separators: ["\n\n", "\n", " ", ""] # list of strings
	front_chunks_to_remove: # int or None
	last_chunks_to_remove: # int or None
	delimiters_to_remove: ['\t', '\n', " ", " "] # list of strings