Final_Assignment_Template

Sleeping

App Files Files Community

ChillThrills commited on May 13, 2025

Commit

06c0480

1 Parent(s): 91e1308

...

Browse files

Files changed (1) hide show

app.py +304 -138

app.py CHANGED Viewed

@@ -81,7 +81,8 @@ DEFAULT_RAG_CONFIG = {
     'processing': {
         'trusted_sources': {'wikipedia.org': 0.8, 'reuters.com': 0.75, 'apnews.com': 0.75},
         'evidence_categories': {'GENERAL': ['information', 'details', 'facts', 'explanation']},
-        'scoring_weights': {'source': 0.5, 'temporal': 0.3, 'category_match': 0.2}
     },
     'enrichment': {
         'enabled': True, 'workers': 3, 'timeout': 10,
@@ -270,6 +271,12 @@ class CacheManager:
     def __contains__(self, key): return key in self._cache and (time.time()-self._timestamps.get(key,0)<self.ttl)
 class SearchProvider(ABC):
     @property
     @abstractmethod
     def provider_name(self) -> str:
@@ -363,33 +370,59 @@ class TavilyProvider(SearchProvider):
         else:
             self._enabled = False
             gaia_logger.warning(f"✗ {self.provider_name} API key missing or TavilyClient not available in config.")
     def _perform_search(self, query: str, max_results: int) -> Optional[List[Dict[str, str]]]:
-        if not self._enabled: return None
         try:
             response = self._client.search(query=query, max_results=max_results, search_depth=self._search_depth)
             hits = response.get('results', [])
-            if not hits: gaia_logger.info(f"[{self.provider_name}] No results: '{query[:70]}'"); return []
-            return [{'href': h.get('url'), 'title': h.get('title',''), 'body': h.get('content','')} for h in hits]
-        except Exception as e: gaia_logger.warning(f"[{self.provider_name}] Search fail: '{query[:70]}': {e}"); return None
 class DuckDuckGoProvider(SearchProvider):
     @property
     def provider_name(self) -> str:
         return "DuckDuckGo"
     def __init__(self, config_dict: Dict):
         super().__init__(config_dict)
         if DDGS:
-            try: self._client = DDGS(timeout=10); self._enabled = True; gaia_logger.info(f"✓ {self.provider_name} Search initialized.")
-            except Exception as e: gaia_logger.warning(f"✗ {self.provider_name} init fail: {e}", exc_info=False)
-        else: gaia_logger.warning(f"✗ {self.provider_name}: DDGS lib missing.")
         self._quota_limit = float('inf')
     def _perform_search(self, query: str, max_results: int) -> Optional[List[Dict[str, str]]]:
-        if not self._enabled: return None
         try:
             hits = list(self._client.text(query, region='wt-wt', max_results=max_results))[:max_results]
-            if not hits: gaia_logger.info(f"[{self.provider_name}] No results: '{query[:70]}'"); return []
-            return [{'href': r.get('href'), 'title': r.get('title',''), 'body': r.get('body','')} for r in hits]
-        except Exception as e: gaia_logger.warning(f"[{self.provider_name}] Search fail: '{query[:70]}': {e}"); return None
 class CompositeSearchClient:
     def __init__(self, config_dict: Dict):
@@ -398,47 +431,66 @@ class CompositeSearchClient:
         self.providers = self._init_providers(config_dict)
         self.cache = CacheManager(
             ttl=config_dict.get('caching', {}).get('search_cache_ttl', 300),
-            max_size=config_dict.get('caching', {}).get('search_cache_size', 50), name="SearchClientCache")
         self._retry_att = self._search_config.get("retry_attempts", 2)
         self._retry_del = self._search_config.get("retry_delay", 2)
         self._def_max_r = self._search_config.get("default_max_results", 3)
     def _init_providers(self, config_dict: Dict) -> List[SearchProvider]:
         providers: List[SearchProvider] = []
         if TAVILY_API_KEY and TavilyClient:
-            tavily_prov = TavilyProvider(config_dict);
-            if tavily_prov.available(): providers.append(tavily_prov)
         if GOOGLE_CUSTOM_SEARCH_API_KEY and GOOGLE_CUSTOM_SEARCH_CSE_ID:
-            google_prov = GoogleProvider(config_dict);
-            if google_prov.available(): providers.append(google_prov)
         if DDGS:
-            ddgs_prov = DuckDuckGoProvider(config_dict);
-            if ddgs_prov.available(): providers.append(ddgs_prov)
-        if not providers: gaia_logger.error("RAG: No search providers initialized!")
-        else: gaia_logger.info(f"RAG Providers: {[p.provider_name for p in providers]}")
         return providers
     def search(self, query: str, max_results: Optional[int] = None, force_refresh: bool = False) -> List[Dict]:
         q, actual_r = query.strip(), max_results if max_results is not None else self._def_max_r
-        if not q: return []
         cache_key = (q, actual_r)
-        if not force_refresh and (cached := self.cache.get(cache_key)) is not None: return cached
         for prov in self.providers:
             for attempt in range(self._retry_att + 1):
-                if not prov.available(): break
                 try:
                     results = prov.search(q, actual_r)
-                    if results is not None: self.cache.set(cache_key, results); return results
                     gaia_logger.warning(f"[{prov.provider_name}] search None: '{q[:50]}' (att {attempt+1})")
-                    if attempt < self._retry_att: time.sleep(self._retry_del)
                 except Exception as e:
                     gaia_logger.error(f"[{prov.provider_name}] Ex during search '{q[:50]}': {e}", exc_info=True)
-                    if attempt < self._retry_att: time.sleep(self._retry_del)
         gaia_logger.error(f"RAG: All providers failed for query: '{q[:50]}'.")
-        self.cache.set(cache_key, []); return []
 class GaiaQueryBuilder:
     def __init__(self, base_query: str, config_dict: Dict):
         self.base_query = base_query.strip()
         gaia_logger.debug(f"GaiaQueryBuilder init: '{self.base_query[:100]}'")
     def get_queries(self) -> Dict[str, List[Tuple[str, str]]]:
         queries = {'primary': [(self.base_query, 'GENERAL')]} if self.base_query else {'primary': []}
         gaia_logger.debug(f"RAG Generated queries: {queries}")
@@ -451,105 +503,174 @@ class ResultProcessor:
         self.seen_urls: Set[str] = set()
         self.date_pattern = DEFAULT_RAG_CONFIG['processing'].get('date_pattern', r'\b\d{4}\b')
         gaia_logger.debug("RAG ResultProcessor initialized.")
     def process_batch(self, results: List[Dict], query_tag: str, initial_cat: str='GENERAL') -> List[Dict]:
         processed: List[Dict] = []
-        if not results: return processed
         for r in results:
             url = r.get('href')
-            if not url or self._normalize_url(url) in self.seen_urls: continue
             self.seen_urls.add(self._normalize_url(url))
-            res_data = {'title':r.get('title',''),'body':r.get('body',''),'href':url,'query_tag':query_tag,'category':initial_cat,
-                        'source_quality':0.5,'temporal_relevance':0.1,'combined_score':0.0}
-            self._score_result(res_data); processed.append(res_data)
         gaia_logger.debug(f"[RAG Proc] Batch: {len(processed)} new results from '{query_tag}'")
         return processed
-    def _normalize_url(self, url: str) -> str: return re.sub(r'^https?://(?:www\.)?','',str(url)).rstrip('/') if url else ""
     def _score_result(self, result: Dict):
-        url,body,title = result.get('href',''),result.get('body',''),result.get('title','')
         source_q = 0.5
-        if domain_match := re.search(r'https?://(?:www\.)?([^/]+)', url or ""): source_q = self.trusted_sources.get(domain_match.group(1),0.5)
-        result['source_quality'] = source_q; temporal_r = 0.1
-        text_combo = (str(title)+' '+str(body)).lower()
-        if any(k in text_combo for k in ['today','current','latest']) or re.search(r'\b\d+\s+hours?\s+ago',text_combo): temporal_r=0.9
-        elif re.search(self.date_pattern,text_combo): temporal_r=0.5
-        result['temporal_relevance'] = temporal_r; result['combined_score'] = (source_q*0.6 + temporal_r*0.4)
 class ContentEnricher:
     def __init__(self, config_dict: Dict):
         self.enrich_config = config_dict.get('enrichment', {})
         self._enabled = self.enrich_config.get('enabled', False) and bool(BeautifulSoup)
-        if not self._enabled: gaia_logger.warning("RAG ContentEnricher disabled (BeautifulSoup missing or config)."); return
-        self._timeout = self.enrich_config.get('timeout',10); self._max_w = self.enrich_config.get('workers',3)
-        self._min_l,self._max_l = self.enrich_config.get('min_text_length',200),self.enrich_config.get('max_text_length',8000)
-        self._skip_ext = tuple(self.enrich_config.get('skip_extensions',[]))
         self.cache = CacheManager(
-            ttl=config_dict.get('caching',{}).get('enrich_cache_ttl',600),
-            max_size=config_dict.get('caching',{}).get('enrich_cache_size',25), name="EnrichCache")
         gaia_logger.info(f"RAG ContentEnricher Initialized. Enabled: {self._enabled}")
-    def enrich_batch(self, results: List[Dict], force_refresh: bool=False) -> List[Dict]:
-        if not self._enabled or not results: return results
         updated_res = []
         with ThreadPoolExecutor(max_workers=self._max_w) as executor:
             future_map = {executor.submit(self._fetch_single, r, force_refresh): r for r in results}
-            for future in as_completed(future_map): updated_res.append(future.result())
         return updated_res
     def _fetch_single(self, result: Dict, force_refresh: bool) -> Dict:
-        url = result.get('href'); result.setdefault('enriched',False); result.setdefault('enrichment_failed',None); result.setdefault('enrichment_skipped_type',None)
-        if not url: result['enrichment_skipped_type']='no_url'; return result
         if not force_refresh and (cached := self.cache.get(url)) is not None:
-            if cached: result.update(cached); gaia_logger.debug(f"[Enrich] Cache hit: {url}"); return result
-        if url.lower().endswith(self._skip_ext): result['enrichment_skipped_type']='extension'; return result
         try:
             headers = {'User-Agent': 'Mozilla/5.0 GaiaRAGAgent/1.0'}
-            response = requests.get(url, headers=headers, timeout=self._timeout, allow_redirects=True); response.raise_for_status()
-            if 'text/html' not in response.headers.get('Content-Type','').lower(): result['enrichment_skipped_type']='non-html'; return result
             soup = BeautifulSoup(response.text, 'lxml')
-            for el_name in ["script","style","nav","header","footer","aside","form","iframe","img","svg",".ad",".advertisement"]:
-                for el in soup.select(el_name): el.decompose()
             main_el = soup.select_one('article, main, [role="main"], .entry-content, .post-content, #content, #main') or soup.body
-            text = main_el.get_text(separator='\n',strip=True) if main_el else ""
             text = re.sub(r'(\s*\n\s*){2,}', '\n\n', text).strip()
             if len(text) >= self._min_l:
                 result['body'] = text[:self._max_l] + ("..." if len(text) > self._max_l else "")
-                result['enriched'] = True; self.cache.set(url, {'body':result['body'], 'enriched':True})
                 gaia_logger.info(f"[Enrich] OK: {url} ({len(result['body'])} chars).")
-            else: result['enrichment_failed'] = 'too_short'
-        except Exception as e: result['enrichment_failed'] = type(e).__name__; gaia_logger.warning(f"[Enrich] Fail: {url}: {e}",exc_info=False)
         return result
 class GeneralRAGPipeline:
     def __init__(self, config_dict: Optional[Dict] = None):
         self.config = config_dict if config_dict is not None else DEFAULT_RAG_CONFIG
         self.search_client = CompositeSearchClient(self.config)
-        enrich_cfg = self.config.get('enrichment', {}); self.enricher = ContentEnricher(self.config) if enrich_cfg.get('enabled',False) and BeautifulSoup else None
-        if not self.enricher: gaia_logger.info("RAG Content Enrichment is disabled (no BeautifulSoup or config).")
-        self.pipeline_cache = CacheManager(ttl=self.config.get('caching',{}).get('analyzer_cache_ttl',3600),max_size=self.config.get('caching',{}).get('analyzer_cache_size',30),name="RAGPipelineCache")
         gaia_logger.info("GeneralRAGPipeline initialized.")
     def analyze(self, query: str, force_refresh: bool = False) -> List[Dict]:
-        q=query.strip();
-        if not q: return []
-        cfg_res,cfg_search=self.config.get('results',{}),self.config.get('search',{})
-        total_lim,enrich_cnt=cfg_res.get('total_limit',3),cfg_res.get('enrich_count',2)
-        enrich_en=self.config.get('enrichment',{}).get('enabled',False) and bool(self.enricher)
-        max_r_pq=cfg_search.get('default_max_results',3);cache_key=(q,max_r_pq,total_lim,enrich_en,enrich_cnt)
-        if not force_refresh and (cached := self.pipeline_cache.get(cache_key)) is not None: gaia_logger.info(f"[RAG Analyze] Cache hit: '{q[:50]}'"); return cached
-        if force_refresh: self.search_client.cache.clear(); self.enricher.cache.clear() if self.enricher else None
-        all_res,res_proc=[],ResultProcessor(self.config);staged_qs=GaiaQueryBuilder(q,self.config).get_queries()
-        for stage,qs_in_stage in staged_qs.items():
-            for query_s,cat in qs_in_stage:
-                if len(all_res)>=total_lim*2:break
                 gaia_logger.info(f"[RAG Analyze] Stage '{stage}': Search '{query_s[:70]}'")
-                s_res=self.search_client.search(query_s,max_results=max_r_pq,force_refresh=force_refresh)
                 all_res.extend(res_proc.process_batch(s_res or [], query_s, initial_cat=cat))
-        all_res.sort(key=lambda x: x.get('combined_score',0), reverse=True)
         if enrich_en and self.enricher and all_res:
-            to_enrich=[r for r in all_res[:enrich_cnt] if r.get('href')]
             gaia_logger.info(f"[RAG Analyze] Enriching {len(to_enrich)} items...")
-            enriched_map={item['href']:item for item in self.enricher.enrich_batch(to_enrich,force_refresh=force_refresh) if item.get('href')}
-            temp_results=[enriched_map.get(r['href'],r) if r.get('href') else r for r in all_res]
-            all_res=temp_results;all_res.sort(key=lambda x: x.get('combined_score',0),reverse=True)
-        final_results=all_res[:total_lim];gaia_logger.info(f"[RAG Analyze] Done. {len(final_results)} results for '{q[:50]}'")
-        self.pipeline_cache.set(cache_key,final_results);return final_results
 class GaiaLevel1Agent:
     def __init__(self, api_url: str = DEFAULT_API_URL):
@@ -563,10 +684,13 @@ class GaiaLevel1Agent:
                 model_name = 'gemini-1.0-pro'
                 self.llm_model = genai.GenerativeModel(model_name)
                 gaia_logger.info(f"Gemini LLM ('{model_name}') initialized.")
-            except Exception as e: gaia_logger.error(f"Error initializing Gemini LLM: {e}", exc_info=True)
-        else: gaia_logger.warning("Gemini LLM dependencies or API key missing.")
-        if not self.llm_model: gaia_logger.warning("LLM (Gemini) unavailable. Limited capabilities.")
         gaia_logger.info(f"GaiaLevel1Agent (RAG & FileProcessor) initialized. API: {self.api_url}")
     @lru_cache(maxsize=32)
@@ -582,7 +706,8 @@ class GaiaLevel1Agent:
                 content_disposition = response.headers.get('Content-Disposition')
                 if content_disposition:
                     header_filename = FileProcessor._get_filename_from_url(content_disposition)
-                    if header_filename != "unknown_file": filename = header_filename
                 content_type = response.headers.get("Content-Type", "")
@@ -591,21 +716,27 @@ class GaiaLevel1Agent:
                 return processed_content
             except requests.exceptions.HTTPError as e:
-                if e.response.status_code == 404: gaia_logger.warning(f"File not found: {file_url}"); return None
                 gaia_logger.warning(f"HTTP error fetching file {task_id}: {e}")
             except requests.exceptions.Timeout:
                 gaia_logger.warning(f"Timeout fetching file {task_id}")
-                if attempt < 1: time.sleep(1)
             except Exception as e:
                 gaia_logger.error(f"Error fetching/processing file {task_id} ({file_url}): {e}", exc_info=True)
-                if attempt < 1: time.sleep(1)
         return None
     def _formulate_answer_with_llm(self, question: str, file_context: Optional[str], web_context: Optional[str]) -> str:
         if not self.llm_model:
             gaia_logger.warning("LLM model (Gemini) not available for answer formulation.")
-            if web_context: return f"Based on web search (LLM unavailable): {web_context.splitlines()[0] if web_context.splitlines() else 'No specific snippet found.'}"
-            if file_context: return f"Based on the provided document (LLM unavailable, first 100 chars of processed content): {file_context[:100]}..."
             return "I am currently unable to process this request fully as the LLM is not available."
         prompt_parts = [
@@ -626,8 +757,8 @@ class GaiaLevel1Agent:
                 truncated_web_context = web_context[:available_len_for_web] + "\n... (web context truncated)"
                 gaia_logger.info(f"Truncated web context from {len(web_context)} to {len(truncated_web_context)} chars.")
             elif available_len_for_web <= 0 and web_context:
-                 truncated_web_context = "\n...(web context omitted due to length constraints with file context)"
-                 gaia_logger.warning("Web context completely omitted due to length constraints with file context.")
             prompt_parts.extend(["\n\nContext from Web Search Results:\n---", truncated_web_context, "---"])
             combined_context_len += len(truncated_web_context)
@@ -652,7 +783,7 @@ class GaiaLevel1Agent:
                 top_p=0.95,
                 max_output_tokens=2048
             )
-            safety_set = [{"category":c,"threshold":"BLOCK_MEDIUM_AND_ABOVE"} for c in ["HARM_CATEGORY_HARASSMENT","HARM_CATEGORY_HATE_SPEECH","HARM_CATEGORY_SEXUALLY_EXPLICIT","HARM_CATEGORY_DANGEROUS_CONTENT"]]
             response = self.llm_model.generate_content(
                 final_prompt,
@@ -672,7 +803,8 @@ class GaiaLevel1Agent:
             return llm_answer
         except Exception as e:
             gaia_logger.error(f"Error calling Gemini API: {e}", exc_info=True)
-            if "429" in str(e) or "ResourceExhausted" in str(type(e).__name__): return "LLM temporarily unavailable (rate limit)."
             return "Error generating LLM answer."
     def __call__(self, question: str, task_id: Optional[str] = None) -> str:
@@ -697,7 +829,9 @@ class GaiaLevel1Agent:
             if not any(kw in q_lower for kw in web_still_needed_kws):
                 needs_web = False
                 gaia_logger.info("Substantial file context present and question doesn't strongly imply web search. Skipping web search.")
-        if "don't search" in q_lower or "do not search" in q_lower: needs_web = False; gaia_logger.info("Web search disabled by prompt.")
         if needs_web:
             search_q = question.replace("?", "").strip()
@@ -707,14 +841,15 @@ class GaiaLevel1Agent:
                 snippets = []
                 for i, res_item in enumerate(rag_res):
                     title, body, href = res_item.get('title','N/A'), res_item.get('body',''), res_item.get('href','#')
-                    provider = res_item.get('query_tag','WebSearch')
                     prefix = "Enriched" if res_item.get('enriched') else "Snippet"
                     body_str = str(body) if body is not None else ""
                     body_prompt = body_str[:(MAX_CONTEXT_LENGTH_LLM // (len(rag_res) if rag_res else 1)) - 200] + "..." if len(body_str) > 2800 else body_str
                     snippets.append(f"Source [{i+1} - {provider}]: {title}\nURL: {href}\n{prefix}: {body_prompt}\n---")
                 web_ctx_str = "\n\n".join(snippets)
                 gaia_logger.info(f"RAG results: {len(web_ctx_str)} chars from {len(rag_res)} sources.")
-            else: gaia_logger.warning("RAG pipeline yielded no web results for the query.")
         answer = self._formulate_answer_with_llm(question, file_ctx_str, web_ctx_str)
         gaia_logger.info(f"Final answer (first 70): {answer[:70]}...")
@@ -722,50 +857,73 @@ class GaiaLevel1Agent:
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     space_id = os.getenv("SPACE_ID")
-    if profile: username = f"{profile.username}"; gaia_logger.info(f"User logged in: {username}")
-    else: gaia_logger.warning("User not logged in."); return "Please Login to Hugging Face.", None
     questions_url, submit_url = f"{DEFAULT_API_URL}/questions", f"{DEFAULT_API_URL}/submit"
     try:
         agent = GaiaLevel1Agent(api_url=DEFAULT_API_URL)
         gaia_logger.info("GaiaLevel1Agent (RAG & FileProcessor) initialized for evaluation.")
-    except Exception as e: gaia_logger.error(f"Error instantiating agent: {e}",exc_info=True);return f"Error initializing agent: {e}",None
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Code link unavailable"
     gaia_logger.info(f"Agent code link: {agent_code}")
     try:
-        response=requests.get(questions_url,timeout=15);response.raise_for_status();questions_data=response.json()
-        if not questions_data or not isinstance(questions_data,list):gaia_logger.error(f"Fetched questions list empty/invalid: {questions_data}");return "Questions list empty/invalid.",None
         gaia_logger.info(f"Fetched {len(questions_data)} questions.")
-    except Exception as e:gaia_logger.error(f"Error fetching questions: {e}",exc_info=True);return f"Error fetching questions: {e}",None
-    results_log,answers_payload=[],[]
-    GEMINI_RPM_LIMIT=60;sleep_llm=(60.0/GEMINI_RPM_LIMIT)+0.8 if GEMINI_RPM_LIMIT>0 else 0.5
     gaia_logger.info(f"LLM Rate: {GEMINI_RPM_LIMIT} RPM. Sleep ~{sleep_llm:.2f}s between LLM calls.")
     gaia_logger.info(f"Running agent on {len(questions_data)} questions...")
-    for i,item in enumerate(questions_data):
-        task_id,q_text=item.get("task_id"),item.get("question")
-        if not task_id or q_text is None:results_log.append({"Task ID":task_id,"Question":q_text,"Submitted Answer":"SKIPPED"});continue
         gaia_logger.info(f"Q {i+1}/{len(questions_data)} - Task: {task_id}")
         try:
-            answer=agent(question=q_text,task_id=task_id)
-            answers_payload.append({"task_id":task_id,"submitted_answer":answer})
-            results_log.append({"Task ID":task_id,"Question":q_text,"Submitted Answer":answer})
         except Exception as e:
-             gaia_logger.error(f"Error agent task {task_id}: {e}",exc_info=True)
-             results_log.append({"Task ID":task_id,"Question":q_text,"Submitted Answer":f"AGENT ERROR: {e}"})
-        if i<len(questions_data)-1:gaia_logger.info(f"Sleep {sleep_llm:.2f}s for LLM rate limit.");time.sleep(sleep_llm)
-    if not answers_payload:return "Agent produced no answers.",pd.DataFrame(results_log or [{"Info":"No questions processed"}])
-    submission={"username":username.strip(),"agent_code":agent_code,"answers":answers_payload}
     gaia_logger.info(f"Submitting {len(answers_payload)} answers for '{username}'...")
     try:
-        response=requests.post(submit_url,json=submission,timeout=60);response.raise_for_status();result_data=response.json()
-        status=(f"Submission Successful!\nUser: {result_data.get('username')}\nScore: {result_data.get('score','N/A')}% "
-                f"({result_data.get('correct_count','?')}/{result_data.get('total_attempted','?')} correct)\n"
-                f"Msg: {result_data.get('message','No message.')}")
-        gaia_logger.info("Submission successful.");return status,pd.DataFrame(results_log)
     except requests.exceptions.HTTPError as e:
-        err_detail=f"Server: {e.response.status_code}. Detail: {e.response.text[:200]}"
-        gaia_logger.error(f"Submission Fail HTTP: {err_detail}",exc_info=False)
-        return f"Submission Failed: {err_detail}",pd.DataFrame(results_log)
-    except Exception as e:gaia_logger.error(f"Submission Fail: {e}",exc_info=True);return f"Submission Failed: {e}",pd.DataFrame(results_log)
 with gr.Blocks(title="GAIA RAG Agent - Advanced") as demo:
     gr.Markdown("# Gaia Level 1 Agent (RAG & FileProcessor) Evaluation Runner")
@@ -794,15 +952,23 @@ if __name__ == "__main__":
     }
     missing_keys = [key_name for key_name, key_val in required_env.items() if not key_val]
     for key_name in required_env:
-        if required_env[key_name]: print(f"✅ {key_name} found.")
-        else: print(f"⚠️ WARNING: {key_name} not set.")
-    if not DDGS: print("⚠️ WARNING: duckduckgo_search lib missing (for RAG DDG).")
-    else: print("✅ duckduckgo_search lib found (for RAG DDG).")
-    if not BeautifulSoup: print("⚠️ WARNING: BeautifulSoup lib missing (for RAG Enricher).")
-    else: print("✅ BeautifulSoup lib found (for RAG Enricher).")
-    if not genai: print("⚠️ WARNING: google-generativeai lib missing (for LLM).")
-    else: print("✅ google-generativeai lib found (for LLM).")
     if missing_keys:
         print(f"\n--- PLEASE SET THE FOLLOWING MISSING ENVIRONMENT VARIABLES FOR FULL FUNCTIONALITY: {', '.join(missing_keys)} ---\n")

     'processing': {
         'trusted_sources': {'wikipedia.org': 0.8, 'reuters.com': 0.75, 'apnews.com': 0.75},
         'evidence_categories': {'GENERAL': ['information', 'details', 'facts', 'explanation']},
+        'scoring_weights': {'source': 0.5, 'temporal': 0.3, 'category_match': 0.2},
+        'date_pattern': r'\b\d{4}\b'
     },
     'enrichment': {
         'enabled': True, 'workers': 3, 'timeout': 10,
     def __contains__(self, key): return key in self._cache and (time.time()-self._timestamps.get(key,0)<self.ttl)
 class SearchProvider(ABC):
+    def __init__(self, config_dict: Dict):
+        self.provider_config = config_dict.get('search', {})
+        self._enabled = False
+        self._quota_limit = self.provider_config.get("quota_limit", float('inf'))
+        self._quota_used = 0
     @property
     @abstractmethod
     def provider_name(self) -> str:
         else:
             self._enabled = False
             gaia_logger.warning(f"✗ {self.provider_name} API key missing or TavilyClient not available in config.")
     def _perform_search(self, query: str, max_results: int) -> Optional[List[Dict[str, str]]]:
+        if not self._enabled:
+            return None
         try:
             response = self._client.search(query=query, max_results=max_results, search_depth=self._search_depth)
             hits = response.get('results', [])
+            if not hits:
+                gaia_logger.info(f"[{self.provider_name}] No results: '{query[:70]}'")
+                return []
+            return [{
+                'href': h.get('url'),
+                'title': h.get('title',''),
+                'body': h.get('content','')
+            } for h in hits]
+        except Exception as e:
+            gaia_logger.warning(f"[{self.provider_name}] Search fail: '{query[:70]}': {e}")
+            return None
 class DuckDuckGoProvider(SearchProvider):
     @property
     def provider_name(self) -> str:
         return "DuckDuckGo"
     def __init__(self, config_dict: Dict):
         super().__init__(config_dict)
         if DDGS:
+            try:
+                self._client = DDGS(timeout=10)
+                self._enabled = True
+                gaia_logger.info(f"✓ {self.provider_name} Search initialized.")
+            except Exception as e:
+                gaia_logger.warning(f"✗ {self.provider_name} init fail: {e}", exc_info=False)
+        else:
+            gaia_logger.warning(f"✗ {self.provider_name}: DDGS lib missing.")
         self._quota_limit = float('inf')
     def _perform_search(self, query: str, max_results: int) -> Optional[List[Dict[str, str]]]:
+        if not self._enabled:
+            return None
         try:
             hits = list(self._client.text(query, region='wt-wt', max_results=max_results))[:max_results]
+            if not hits:
+                gaia_logger.info(f"[{self.provider_name}] No results: '{query[:70]}'")
+                return []
+            return [{
+                'href': r.get('href'),
+                'title': r.get('title',''),
+                'body': r.get('body','')
+            } for r in hits]
+        except Exception as e:
+            gaia_logger.warning(f"[{self.provider_name}] Search fail: '{query[:70]}': {e}")
+            return None
 class CompositeSearchClient:
     def __init__(self, config_dict: Dict):
         self.providers = self._init_providers(config_dict)
         self.cache = CacheManager(
             ttl=config_dict.get('caching', {}).get('search_cache_ttl', 300),
+            max_size=config_dict.get('caching', {}).get('search_cache_size', 50),
+            name="SearchClientCache"
+        )
         self._retry_att = self._search_config.get("retry_attempts", 2)
         self._retry_del = self._search_config.get("retry_delay", 2)
         self._def_max_r = self._search_config.get("default_max_results", 3)
     def _init_providers(self, config_dict: Dict) -> List[SearchProvider]:
         providers: List[SearchProvider] = []
         if TAVILY_API_KEY and TavilyClient:
+            tavily_prov = TavilyProvider(config_dict)
+            if tavily_prov.available():
+                providers.append(tavily_prov)
         if GOOGLE_CUSTOM_SEARCH_API_KEY and GOOGLE_CUSTOM_SEARCH_CSE_ID:
+            google_prov = GoogleProvider(config_dict)
+            if google_prov.available():
+                providers.append(google_prov)
         if DDGS:
+            ddgs_prov = DuckDuckGoProvider(config_dict)
+            if ddgs_prov.available():
+                providers.append(ddgs_prov)
+        if not providers:
+            gaia_logger.error("RAG: No search providers initialized!")
+        else:
+            gaia_logger.info(f"RAG Providers: {[p.provider_name for p in providers]}")
         return providers
     def search(self, query: str, max_results: Optional[int] = None, force_refresh: bool = False) -> List[Dict]:
         q, actual_r = query.strip(), max_results if max_results is not None else self._def_max_r
+        if not q:
+            return []
         cache_key = (q, actual_r)
+        if not force_refresh and (cached := self.cache.get(cache_key)) is not None:
+            return cached
         for prov in self.providers:
             for attempt in range(self._retry_att + 1):
+                if not prov.available():
+                    break
                 try:
                     results = prov.search(q, actual_r)
+                    if results is not None:
+                        self.cache.set(cache_key, results)
+                        return results
                     gaia_logger.warning(f"[{prov.provider_name}] search None: '{q[:50]}' (att {attempt+1})")
+                    if attempt < self._retry_att:
+                        time.sleep(self._retry_del)
                 except Exception as e:
                     gaia_logger.error(f"[{prov.provider_name}] Ex during search '{q[:50]}': {e}", exc_info=True)
+                    if attempt < self._retry_att:
+                        time.sleep(self._retry_del)
         gaia_logger.error(f"RAG: All providers failed for query: '{q[:50]}'.")
+        self.cache.set(cache_key, [])
+        return []
 class GaiaQueryBuilder:
     def __init__(self, base_query: str, config_dict: Dict):
         self.base_query = base_query.strip()
+        self.config = config_dict  # Fixed: store config_dict for potential future use.
         gaia_logger.debug(f"GaiaQueryBuilder init: '{self.base_query[:100]}'")
     def get_queries(self) -> Dict[str, List[Tuple[str, str]]]:
         queries = {'primary': [(self.base_query, 'GENERAL')]} if self.base_query else {'primary': []}
         gaia_logger.debug(f"RAG Generated queries: {queries}")
         self.seen_urls: Set[str] = set()
         self.date_pattern = DEFAULT_RAG_CONFIG['processing'].get('date_pattern', r'\b\d{4}\b')
         gaia_logger.debug("RAG ResultProcessor initialized.")
     def process_batch(self, results: List[Dict], query_tag: str, initial_cat: str='GENERAL') -> List[Dict]:
         processed: List[Dict] = []
+        if not results:
+            return processed
         for r in results:
             url = r.get('href')
+            if not url or self._normalize_url(url) in self.seen_urls:
+                continue
             self.seen_urls.add(self._normalize_url(url))
+            res_data = {
+                'title': r.get('title',''),
+                'body': r.get('body',''),
+                'href': url,
+                'query_tag': query_tag,
+                'category': initial_cat,
+                'source_quality': 0.5,
+                'temporal_relevance': 0.1,
+                'combined_score': 0.0
+            }
+            self._score_result(res_data)
+            processed.append(res_data)
         gaia_logger.debug(f"[RAG Proc] Batch: {len(processed)} new results from '{query_tag}'")
         return processed
+    def _normalize_url(self, url: str) -> str:
+        return re.sub(r'^https?://(?:www\.)?', '', str(url)).rstrip('/') if url else ""
     def _score_result(self, result: Dict):
+        url, body, title = result.get('href', ''), result.get('body', ''), result.get('title', '')
         source_q = 0.5
+        if domain_match := re.search(r'https?://(?:www\.)?([^/]+)', url or ""):
+            source_q = self.trusted_sources.get(domain_match.group(1), 0.5)
+        result['source_quality'] = source_q
+        temporal_r = 0.1
+        text_combo = (str(title) + ' ' + str(body)).lower()
+        if any(k in text_combo for k in ['today', 'current', 'latest']) or re.search(r'\b\d+\s+hours?\s+ago', text_combo):
+            temporal_r = 0.9
+        elif re.search(self.date_pattern, text_combo):
+            temporal_r = 0.5
+        result['temporal_relevance'] = temporal_r
+        result['combined_score'] = (source_q * 0.6 + temporal_r * 0.4)
 class ContentEnricher:
     def __init__(self, config_dict: Dict):
         self.enrich_config = config_dict.get('enrichment', {})
         self._enabled = self.enrich_config.get('enabled', False) and bool(BeautifulSoup)
+        if not self._enabled:
+            gaia_logger.warning("RAG ContentEnricher disabled (BeautifulSoup missing or config).")
+            return
+        self._timeout = self.enrich_config.get('timeout', 10)
+        self._max_w = self.enrich_config.get('workers', 3)
+        self._min_l, self._max_l = self.enrich_config.get('min_text_length', 200), self.enrich_config.get('max_text_length', 8000)
+        self._skip_ext = tuple(self.enrich_config.get('skip_extensions', []))
         self.cache = CacheManager(
+            ttl=config_dict.get('caching', {}).get('enrich_cache_ttl', 600),
+            max_size=config_dict.get('caching', {}).get('enrich_cache_size', 25),
+            name="EnrichCache"
+        )
         gaia_logger.info(f"RAG ContentEnricher Initialized. Enabled: {self._enabled}")
+    def enrich_batch(self, results: List[Dict], force_refresh: bool = False) -> List[Dict]:
+        if not self._enabled or not results:
+            return results
         updated_res = []
         with ThreadPoolExecutor(max_workers=self._max_w) as executor:
             future_map = {executor.submit(self._fetch_single, r, force_refresh): r for r in results}
+            for future in as_completed(future_map):
+                updated_res.append(future.result())
         return updated_res
     def _fetch_single(self, result: Dict, force_refresh: bool) -> Dict:
+        url = result.get('href')
+        result.setdefault('enriched', False)
+        result.setdefault('enrichment_failed', None)
+        result.setdefault('enrichment_skipped_type', None)
+        if not url:
+            result['enrichment_skipped_type'] = 'no_url'
+            return result
         if not force_refresh and (cached := self.cache.get(url)) is not None:
+            if cached:
+                result.update(cached)
+                gaia_logger.debug(f"[Enrich] Cache hit: {url}")
+                return result
+        if url.lower().endswith(self._skip_ext):
+            result['enrichment_skipped_type'] = 'extension'
+            return result
         try:
             headers = {'User-Agent': 'Mozilla/5.0 GaiaRAGAgent/1.0'}
+            response = requests.get(url, headers=headers, timeout=self._timeout, allow_redirects=True)
+            response.raise_for_status()
+            if 'text/html' not in response.headers.get('Content-Type', '').lower():
+                result['enrichment_skipped_type'] = 'non-html'
+                return result
             soup = BeautifulSoup(response.text, 'lxml')
+            for el_name in ["script", "style", "nav", "header", "footer", "aside", "form", "iframe", "img", "svg", ".ad", ".advertisement"]:
+                for el in soup.select(el_name):
+                    el.decompose()
             main_el = soup.select_one('article, main, [role="main"], .entry-content, .post-content, #content, #main') or soup.body
+            text = main_el.get_text(separator='\n', strip=True) if main_el else ""
             text = re.sub(r'(\s*\n\s*){2,}', '\n\n', text).strip()
             if len(text) >= self._min_l:
                 result['body'] = text[:self._max_l] + ("..." if len(text) > self._max_l else "")
+                result['enriched'] = True
+                self.cache.set(url, {'body': result['body'], 'enriched': True})
                 gaia_logger.info(f"[Enrich] OK: {url} ({len(result['body'])} chars).")
+            else:
+                result['enrichment_failed'] = 'too_short'
+        except Exception as e:
+            result['enrichment_failed'] = type(e).__name__
+            gaia_logger.warning(f"[Enrich] Fail: {url}: {e}", exc_info=False)
         return result
 class GeneralRAGPipeline:
     def __init__(self, config_dict: Optional[Dict] = None):
         self.config = config_dict if config_dict is not None else DEFAULT_RAG_CONFIG
         self.search_client = CompositeSearchClient(self.config)
+        enrich_cfg = self.config.get('enrichment', {})
+        self.enricher = ContentEnricher(self.config) if enrich_cfg.get('enabled', False) and BeautifulSoup else None
+        if not self.enricher:
+            gaia_logger.info("RAG Content Enrichment is disabled (no BeautifulSoup or config).")
+        self.pipeline_cache = CacheManager(
+            ttl=self.config.get('caching', {}).get('analyzer_cache_ttl', 3600),
+            max_size=self.config.get('caching', {}).get('analyzer_cache_size', 30),
+            name="RAGPipelineCache"
+        )
         gaia_logger.info("GeneralRAGPipeline initialized.")
     def analyze(self, query: str, force_refresh: bool = False) -> List[Dict]:
+        q = query.strip()
+        if not q:
+            return []
+        cfg_res, cfg_search = self.config.get('results', {}), self.config.get('search', {})
+        total_lim, enrich_cnt = cfg_res.get('total_limit', 3), cfg_res.get('enrich_count', 2)
+        enrich_en = self.config.get('enrichment', {}).get('enabled', False) and bool(self.enricher)
+        max_r_pq = cfg_search.get('default_max_results', 3)
+        cache_key = (q, max_r_pq, total_lim, enrich_en, enrich_cnt)
+        if not force_refresh and (cached := self.pipeline_cache.get(cache_key)) is not None:
+            gaia_logger.info(f"[RAG Analyze] Cache hit: '{q[:50]}'")
+            return cached
+        if force_refresh:
+            self.search_client.cache.clear()
+            if self.enricher:
+                self.enricher.cache.clear()
+        all_res, res_proc = [], ResultProcessor(self.config)
+        staged_qs = GaiaQueryBuilder(q, self.config).get_queries()
+        for stage, qs_in_stage in staged_qs.items():
+            for query_s, cat in qs_in_stage:
+                if len(all_res) >= total_lim * 2:
+                    break
                 gaia_logger.info(f"[RAG Analyze] Stage '{stage}': Search '{query_s[:70]}'")
+                s_res = self.search_client.search(query_s, max_results=max_r_pq, force_refresh=force_refresh)
                 all_res.extend(res_proc.process_batch(s_res or [], query_s, initial_cat=cat))
+        all_res.sort(key=lambda x: x.get('combined_score', 0), reverse=True)
         if enrich_en and self.enricher and all_res:
+            to_enrich = [r for r in all_res[:enrich_cnt] if r.get('href')]
             gaia_logger.info(f"[RAG Analyze] Enriching {len(to_enrich)} items...")
+            enriched_map = {
+                item['href']: item for item in self.enricher.enrich_batch(to_enrich, force_refresh=force_refresh)
+                if item.get('href')
+            }
+            temp_results = [enriched_map.get(r['href'], r) if r.get('href') else r for r in all_res]
+            all_res = temp_results
+            all_res.sort(key=lambda x: x.get('combined_score', 0), reverse=True)
+        final_results = all_res[:total_lim]
+        gaia_logger.info(f"[RAG Analyze] Done. {len(final_results)} results for '{q[:50]}'")
+        self.pipeline_cache.set(cache_key, final_results)
+        return final_results
 class GaiaLevel1Agent:
     def __init__(self, api_url: str = DEFAULT_API_URL):
                 model_name = 'gemini-1.0-pro'
                 self.llm_model = genai.GenerativeModel(model_name)
                 gaia_logger.info(f"Gemini LLM ('{model_name}') initialized.")
+            except Exception as e:
+                gaia_logger.error(f"Error initializing Gemini LLM: {e}", exc_info=True)
+        else:
+            gaia_logger.warning("Gemini LLM dependencies or API key missing.")
+        if not self.llm_model:
+            gaia_logger.warning("LLM (Gemini) unavailable. Limited capabilities.")
         gaia_logger.info(f"GaiaLevel1Agent (RAG & FileProcessor) initialized. API: {self.api_url}")
     @lru_cache(maxsize=32)
                 content_disposition = response.headers.get('Content-Disposition')
                 if content_disposition:
                     header_filename = FileProcessor._get_filename_from_url(content_disposition)
+                    if header_filename != "unknown_file":
+                        filename = header_filename
                 content_type = response.headers.get("Content-Type", "")
                 return processed_content
             except requests.exceptions.HTTPError as e:
+                if e.response.status_code == 404:
+                    gaia_logger.warning(f"File not found: {file_url}")
+                    return None
                 gaia_logger.warning(f"HTTP error fetching file {task_id}: {e}")
             except requests.exceptions.Timeout:
                 gaia_logger.warning(f"Timeout fetching file {task_id}")
+                if attempt < 1:
+                    time.sleep(1)
             except Exception as e:
                 gaia_logger.error(f"Error fetching/processing file {task_id} ({file_url}): {e}", exc_info=True)
+                if attempt < 1:
+                    time.sleep(1)
         return None
     def _formulate_answer_with_llm(self, question: str, file_context: Optional[str], web_context: Optional[str]) -> str:
         if not self.llm_model:
             gaia_logger.warning("LLM model (Gemini) not available for answer formulation.")
+            if web_context:
+                return f"Based on web search (LLM unavailable): {web_context.splitlines()[0] if web_context.splitlines() else 'No specific snippet found.'}"
+            if file_context:
+                return f"Based on the provided document (LLM unavailable, first 100 chars of processed content): {file_context[:100]}..."
             return "I am currently unable to process this request fully as the LLM is not available."
         prompt_parts = [
                 truncated_web_context = web_context[:available_len_for_web] + "\n... (web context truncated)"
                 gaia_logger.info(f"Truncated web context from {len(web_context)} to {len(truncated_web_context)} chars.")
             elif available_len_for_web <= 0 and web_context:
+                truncated_web_context = "\n...(web context omitted due to length constraints with file context)"
+                gaia_logger.warning("Web context completely omitted due to length constraints with file context.")
             prompt_parts.extend(["\n\nContext from Web Search Results:\n---", truncated_web_context, "---"])
             combined_context_len += len(truncated_web_context)
                 top_p=0.95,
                 max_output_tokens=2048
             )
+            safety_set = [{"category": c, "threshold": "BLOCK_MEDIUM_AND_ABOVE"} for c in ["HARM_CATEGORY_HARASSMENT", "HARM_CATEGORY_HATE_SPEECH", "HARM_CATEGORY_SEXUALLY_EXPLICIT", "HARM_CATEGORY_DANGEROUS_CONTENT"]]
             response = self.llm_model.generate_content(
                 final_prompt,
             return llm_answer
         except Exception as e:
             gaia_logger.error(f"Error calling Gemini API: {e}", exc_info=True)
+            if "429" in str(e) or "ResourceExhausted" in str(type(e).__name__):
+                return "LLM temporarily unavailable (rate limit)."
             return "Error generating LLM answer."
     def __call__(self, question: str, task_id: Optional[str] = None) -> str:
             if not any(kw in q_lower for kw in web_still_needed_kws):
                 needs_web = False
                 gaia_logger.info("Substantial file context present and question doesn't strongly imply web search. Skipping web search.")
+        if "don't search" in q_lower or "do not search" in q_lower:
+            needs_web = False
+            gaia_logger.info("Web search disabled by prompt.")
         if needs_web:
             search_q = question.replace("?", "").strip()
                 snippets = []
                 for i, res_item in enumerate(rag_res):
                     title, body, href = res_item.get('title','N/A'), res_item.get('body',''), res_item.get('href','#')
+                    provider = res_item.get('query_tag','WebSearch')
                     prefix = "Enriched" if res_item.get('enriched') else "Snippet"
                     body_str = str(body) if body is not None else ""
                     body_prompt = body_str[:(MAX_CONTEXT_LENGTH_LLM // (len(rag_res) if rag_res else 1)) - 200] + "..." if len(body_str) > 2800 else body_str
                     snippets.append(f"Source [{i+1} - {provider}]: {title}\nURL: {href}\n{prefix}: {body_prompt}\n---")
                 web_ctx_str = "\n\n".join(snippets)
                 gaia_logger.info(f"RAG results: {len(web_ctx_str)} chars from {len(rag_res)} sources.")
+            else:
+                gaia_logger.warning("RAG pipeline yielded no web results for the query.")
         answer = self._formulate_answer_with_llm(question, file_ctx_str, web_ctx_str)
         gaia_logger.info(f"Final answer (first 70): {answer[:70]}...")
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     space_id = os.getenv("SPACE_ID")
+    if profile:
+        username = f"{profile.username}"
+        gaia_logger.info(f"User logged in: {username}")
+    else:
+        gaia_logger.warning("User not logged in.")
+        return "Please Login to Hugging Face.", None
     questions_url, submit_url = f"{DEFAULT_API_URL}/questions", f"{DEFAULT_API_URL}/submit"
     try:
         agent = GaiaLevel1Agent(api_url=DEFAULT_API_URL)
         gaia_logger.info("GaiaLevel1Agent (RAG & FileProcessor) initialized for evaluation.")
+    except Exception as e:
+        gaia_logger.error(f"Error instantiating agent: {e}", exc_info=True)
+        return f"Error initializing agent: {e}", None
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Code link unavailable"
     gaia_logger.info(f"Agent code link: {agent_code}")
     try:
+        response = requests.get(questions_url, timeout=15)
+        response.raise_for_status()
+        questions_data = response.json()
+        if not questions_data or not isinstance(questions_data, list):
+            gaia_logger.error(f"Fetched questions list empty/invalid: {questions_data}")
+            return "Questions list empty/invalid.", None
         gaia_logger.info(f"Fetched {len(questions_data)} questions.")
+    except Exception as e:
+        gaia_logger.error(f"Error fetching questions: {e}", exc_info=True)
+        return f"Error fetching questions: {e}", None
+    results_log, answers_payload = [], []
+    GEMINI_RPM_LIMIT = 60
+    sleep_llm = (60.0 / GEMINI_RPM_LIMIT) + 0.8 if GEMINI_RPM_LIMIT > 0 else 0.5
     gaia_logger.info(f"LLM Rate: {GEMINI_RPM_LIMIT} RPM. Sleep ~{sleep_llm:.2f}s between LLM calls.")
     gaia_logger.info(f"Running agent on {len(questions_data)} questions...")
+    for i, item in enumerate(questions_data):
+        task_id, q_text = item.get("task_id"), item.get("question")
+        if not task_id or q_text is None:
+            results_log.append({"Task ID": task_id, "Question": q_text, "Submitted Answer": "SKIPPED"})
+            continue
         gaia_logger.info(f"Q {i+1}/{len(questions_data)} - Task: {task_id}")
         try:
+            answer = agent(question=q_text, task_id=task_id)
+            answers_payload.append({"task_id": task_id, "submitted_answer": answer})
+            results_log.append({"Task ID": task_id, "Question": q_text, "Submitted Answer": answer})
         except Exception as e:
+            gaia_logger.error(f"Error agent task {task_id}: {e}", exc_info=True)
+            results_log.append({"Task ID": task_id, "Question": q_text, "Submitted Answer": f"AGENT ERROR: {e}"})
+        if i < len(questions_data) - 1:
+            gaia_logger.info(f"Sleep {sleep_llm:.2f}s for LLM rate limit.")
+            time.sleep(sleep_llm)
+    if not answers_payload:
+        return "Agent produced no answers.", pd.DataFrame(results_log or [{"Info": "No questions processed"}])
+    submission = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     gaia_logger.info(f"Submitting {len(answers_payload)} answers for '{username}'...")
     try:
+        response = requests.post(submit_url, json=submission, timeout=60)
+        response.raise_for_status()
+        result_data = response.json()
+        status = (f"Submission Successful!\nUser: {result_data.get('username')}\nScore: {result_data.get('score','N/A')}% "
+                  f"({result_data.get('correct_count','?')}/{result_data.get('total_attempted','?')} correct)\n"
+                  f"Msg: {result_data.get('message','No message.')}")
+        gaia_logger.info("Submission successful.")
+        return status, pd.DataFrame(results_log)
     except requests.exceptions.HTTPError as e:
+        err_detail = f"Server: {e.response.status_code}. Detail: {e.response.text[:200]}"
+        gaia_logger.error(f"Submission Fail HTTP: {err_detail}", exc_info=False)
+        return f"Submission Failed: {err_detail}", pd.DataFrame(results_log)
+    except Exception as e:
+        gaia_logger.error(f"Submission Fail: {e}", exc_info=True)
+        return f"Submission Failed: {e}", pd.DataFrame(results_log)
 with gr.Blocks(title="GAIA RAG Agent - Advanced") as demo:
     gr.Markdown("# Gaia Level 1 Agent (RAG & FileProcessor) Evaluation Runner")
     }
     missing_keys = [key_name for key_name, key_val in required_env.items() if not key_val]
     for key_name in required_env:
+        if required_env[key_name]:
+            print(f"✅ {key_name} found.")
+        else:
+            print(f"⚠️ WARNING: {key_name} not set.")
+    if not DDGS:
+        print("⚠️ WARNING: duckduckgo_search lib missing (for RAG DDG).")
+    else:
+        print("✅ duckduckgo_search lib found (for RAG DDG).")
+    if not BeautifulSoup:
+        print("⚠️ WARNING: BeautifulSoup lib missing (for RAG Enricher).")
+    else:
+        print("✅ BeautifulSoup lib found (for RAG Enricher).")
+    if not genai:
+        print("⚠️ WARNING: google-generativeai lib missing (for LLM).")
+    else:
+        print("✅ google-generativeai lib found (for LLM).")
     if missing_keys:
         print(f"\n--- PLEASE SET THE FOLLOWING MISSING ENVIRONMENT VARIABLES FOR FULL FUNCTIONALITY: {', '.join(missing_keys)} ---\n")