KPatelis's picture
Upload 26 files
dfd1417 verified
Raw
History Blame Contribute Delete
1.99 kB
# Project: LangGraph HF Agent for GAIA
# config.yaml
data: "data/metadata.jsonl" # Path to the GAIA documents dataset
retrievers:
enable_vector_search: true # Enable vector-based document retrieval
enable_keyword_search: true # Enable keyword-based document retrieval
final_rrf_k: 3 # Number of top documents to consider after reciprocal rank fusion
vector_store:
table: "gaia_documents" # Type of vector store (e.g., faiss, chroma)
query: "match_documents" # Method to query the vector store
k: 10 # Number of top documents to retrieve
threshold: 0.5 # Similarity threshold for document retrieval
bm25:
k: 5 # Number of top documents to retrieve using keyword search
models:
cache_folder: "./models/hf_cache" # Directory to cache Hugging Face models
embeddings:
model_name: "Alibaba-NLP/gte-modernbert-base" # Hugging Face embedding model ID
reranker:
model_name: "Alibaba-NLP/gte-reranker-modernbert-base" # Hugging Face model ID for reranking
llm:
model_name: "Qwen/Qwen3-32B" # Hugging Face model ID
parameters:
temperature: 0.6
repetition_penalty: 1.3
provider: "auto"
thinking_enabled: false
timeout: 300 # Read timeout (s) for the HF Inference call. Default 120 is too short under load.
max_new_tokens: 4096 # Output cap. Default 512 truncates long responses and breaks tool calls.
vlm:
model_name: "Qwen/Qwen3-VL-32B-Instruct" # Hugging Face model ID
asr:
model_name: "openai/whisper-large-v3" # Hugging Face model ID — must have a provider on HF Inference Providers
graph:
recursion_limit: 40 # Max graph-node visits before bailing.
api:
base_url: "https://agents-course-unit4-scoring.hf.space"
files_dir: "./data/task_files" # Local directory for downloaded task files