# Project: LangGraph HF Agent for GAIA # config.yaml data: "data/metadata.jsonl" # Path to the GAIA documents dataset retrievers: enable_vector_search: true # Enable vector-based document retrieval enable_keyword_search: true # Enable keyword-based document retrieval final_rrf_k: 3 # Number of top documents to consider after reciprocal rank fusion vector_store: table: "gaia_documents" # Type of vector store (e.g., faiss, chroma) query: "match_documents" # Method to query the vector store k: 10 # Number of top documents to retrieve threshold: 0.5 # Similarity threshold for document retrieval bm25: k: 5 # Number of top documents to retrieve using keyword search models: cache_folder: "./models/hf_cache" # Directory to cache Hugging Face models embeddings: model_name: "Alibaba-NLP/gte-modernbert-base" # Hugging Face embedding model ID reranker: model_name: "Alibaba-NLP/gte-reranker-modernbert-base" # Hugging Face model ID for reranking llm: model_name: "Qwen/Qwen3-32B" # Hugging Face model ID parameters: temperature: 0.6 repetition_penalty: 1.3 provider: "auto" thinking_enabled: false timeout: 300 # Read timeout (s) for the HF Inference call. Default 120 is too short under load. max_new_tokens: 4096 # Output cap. Default 512 truncates long responses and breaks tool calls. vlm: model_name: "Qwen/Qwen3-VL-32B-Instruct" # Hugging Face model ID asr: model_name: "openai/whisper-large-v3" # Hugging Face model ID — must have a provider on HF Inference Providers graph: recursion_limit: 40 # Max graph-node visits before bailing. api: base_url: "https://agents-course-unit4-scoring.hf.space" files_dir: "./data/task_files" # Local directory for downloaded task files