Final_Assignment_Template

Sleeping

App Files Files Community

ChillThrills commited on Jun 2, 2025

Commit

521480c

1 Parent(s): 1098883

gemini-2.5-flash-preview-05-20

Browse files

Files changed (1) hide show

app.py +59 -108

app.py CHANGED Viewed

@@ -15,7 +15,7 @@ from abc import ABC, abstractmethod
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from concurrent.futures import TimeoutError as FuturesTimeoutError
 from collections import defaultdict
-import tempfile # Added for robust temporary directory management
 try:
     import google.generativeai as genai
@@ -23,10 +23,10 @@ try:
 except ImportError:
     genai = None
     GenerationConfig = None
-    HarmCategory = None # Added for safety settings/finish reason details
-    HarmBlockThreshold = None # Added for safety settings
-    FinishReason = None # Added for checking candidate finish reason
-    HarmProbability = None # Added for checking safety ratings probability
     print("WARNING: google-generativeai library not found. Install with: pip install google-generativeai")
 try:
@@ -167,9 +167,8 @@ def _get_video_object_detector():
     global video_object_detector_pipeline, VIDEO_ANALYSIS_DEVICE
     if video_object_detector_pipeline is None and hf_transformers_pipeline and torch:
         try:
-            # Simplified device selection, consistent with FileProcessor's ASR
             device_id = 0 if torch.cuda.is_available() else -1
-            if VIDEO_ANALYSIS_DEVICE == -1 : VIDEO_ANALYSIS_DEVICE = device_id # Set global if not user-overridden
             target_device = VIDEO_ANALYSIS_DEVICE if VIDEO_ANALYSIS_DEVICE != -1 else device_id
@@ -184,7 +183,7 @@ def _get_video_object_detector():
             return None
     return video_object_detector_pipeline
-def _get_video_vqa_pipeline(): # Renamed and changed to load VQA
     global video_vqa_pipeline, VIDEO_ANALYSIS_DEVICE
     if video_vqa_pipeline is None and hf_transformers_pipeline and torch:
         try:
@@ -194,8 +193,8 @@ def _get_video_vqa_pipeline(): # Renamed and changed to load VQA
             target_device = VIDEO_ANALYSIS_DEVICE if VIDEO_ANALYSIS_DEVICE != -1 else device_id
             video_vqa_pipeline = hf_transformers_pipeline(
-                "visual-question-answering", # Task is VQA
-                model=VIDEO_ANALYSIS_VQA_MODEL, # VQA model
                 device=target_device
             )
             gaia_logger.info(f"Video VQA pipeline ('{VIDEO_ANALYSIS_VQA_MODEL}') initialized on {'cuda' if target_device==0 else 'cpu'}.")
@@ -371,7 +370,7 @@ class FileProcessor:
                     if not df_list_for_fallback and xls:
                          for sheet_name in xls.sheet_names:
                             df_list_for_fallback.append((sheet_name, xls.parse(sheet_name)))
-                    elif not xls and not df_list_for_fallback: # Ensure df_list_for_fallback is populated if xls parsing failed early
                         temp_xls = pd.ExcelFile(io.BytesIO(content), engine='openpyxl')
                         for sheet_name in temp_xls.sheet_names:
                             df_list_for_fallback.append((sheet_name, temp_xls.parse(sheet_name)))
@@ -406,7 +405,7 @@ class FileProcessor:
                         page_text = page.extract_text()
                         if page_text:
                             text_content += page_text + "\n"
-                        if len(text_content) > MAX_FILE_CONTEXT_LENGTH * 1.2: # Check slightly over to allow truncation logic to handle it
                             break
             if not text_content:
                 return f"PDF Document: '{filename}'. No text could be extracted or PDF is empty."
@@ -495,13 +494,13 @@ class CacheManager:
             self.delete(key)
         return None
     def set(self, key: Any, value: Any):
-        if key in self._cache: self.delete(key) # Remove to update timestamp and order
         while len(self._cache) >= self.max_size and self._access_order:
             old_key = self._access_order.pop(0)
-            if old_key in self._cache: # Should always be true
                 del self._cache[old_key]; del self._timestamps[old_key]
         try: self._cache[key] = copy.deepcopy(value)
-        except TypeError: self._cache[key] = value # For non-deep-copyable items
         self._timestamps[key] = time.time(); self._access_order.append(key)
     def delete(self, key: Any):
         if key in self._cache:
@@ -741,13 +740,13 @@ class GeneralRAGPipeline:
         max_r_pq = cfg_search.get('default_max_results', 3)
         cache_key = (q, max_r_pq, total_lim, enrich_en, enrich_cnt)
         if not force_refresh and (cached := self.pipeline_cache.get(cache_key)) is not None: return cached
-        if force_refresh: self.search_client.cache.clear(); # Clears underlying search client cache
-        if self.enricher and force_refresh: self.enricher.cache.clear() # Clear enricher cache if force_refresh
         all_res, res_proc = [], ResultProcessor(self.config)
         staged_qs = GaiaQueryBuilder(q, self.config).get_queries()
         for stage, qs_in_stage in staged_qs.items():
             for query_s, cat in qs_in_stage:
-                if len(all_res) >= total_lim * 2: break # Fetch more initially to allow for better selection
                 s_res = self.search_client.search(query_s, max_results=max_r_pq, force_refresh=force_refresh)
                 all_res.extend(res_proc.process_batch(s_res or [], query_s, initial_cat=cat))
         all_res.sort(key=lambda x: x.get('combined_score', 0), reverse=True)
@@ -769,20 +768,11 @@ class GaiaLevel1Agent:
             try:
                 genai.configure(api_key=GOOGLE_GEMINI_API_KEY)
                 model_name = 'gemini-2.5-flash-preview-05-20'
                 self.llm_model = genai.GenerativeModel(model_name)
                 gaia_logger.info(f"Gemini LLM ('{model_name}') initialized.")
             except Exception as e:
-                gaia_logger.error(f"Error initializing Gemini LLM: {e}", exc_info=True)
-                # Attempt fallback if specific model fails (e.g. not available in region, or name typo)
-                try:
-                    gaia_logger.info("Attempting fallback to 'gemini-1.0-pro' for LLM.")
-                    self.llm_model = genai.GenerativeModel('gemini-1.0-pro') # A common, generally available model
-                    gaia_logger.info("Gemini LLM ('gemini-1.0-pro') initialized as fallback.")
-                except Exception as e_fallback:
-                    gaia_logger.error(f"Fallback LLM initialization also failed: {e_fallback}", exc_info=True)
         else:
             gaia_logger.warning("Gemini LLM dependencies or API key missing.")
@@ -798,12 +788,12 @@ class GaiaLevel1Agent:
     def _fetch_and_process_file_content(self, task_id: str) -> Optional[str]:
         file_url = f"{self.api_url}/files/{task_id}"
-        for attempt in range(2): # Retry once
             try:
                 response = requests.get(file_url, timeout=AGENT_DEFAULT_TIMEOUT)
                 response.raise_for_status()
-                filename = FileProcessor._get_filename_from_url(response.url) # Fallback from URL
                 content_disposition = response.headers.get('Content-Disposition')
                 if content_disposition:
                     header_filename = FileProcessor._get_filename_from_url(content_disposition)
@@ -816,7 +806,7 @@ class GaiaLevel1Agent:
             except requests.exceptions.HTTPError as e:
                 if e.response.status_code == 404:
                     gaia_logger.warning(f"File not found for task {task_id}: {file_url}")
-                    return None # No point retrying 404
                 gaia_logger.warning(f"HTTP error fetching file {task_id}: {e}")
             except requests.exceptions.Timeout:
                 gaia_logger.warning(f"Timeout fetching file {task_id}")
@@ -832,7 +822,6 @@ class GaiaLevel1Agent:
         cleaned = answer_text.lower().strip()
-        # Remove common prefixes
         prefixes_to_remove = [
             "a type of ", "a variety of ", "it's a ", "it is a ", "an ", "a ", "the ",
             "this is a ", "this bird is a ", "it appears to be a ", "looks like a ",
@@ -842,26 +831,19 @@ class GaiaLevel1Agent:
             if cleaned.startswith(prefix):
                 cleaned = cleaned[len(prefix):]
-        # Remove common suffixes
         suffixes_to_remove = [" bird", " species"]
         for suffix in suffixes_to_remove:
             if cleaned.endswith(suffix):
                 cleaned = cleaned[:-len(suffix)]
-        # Remove parenthetical content or descriptive clauses if simple
-        cleaned = re.sub(r"\s*\(.*\)\s*$", "", cleaned).strip() # e.g. "robin (american)" -> "robin"
-        cleaned = re.sub(r",\s*which is.*$", "", cleaned).strip() # e.g. "sparrow, which is small" -> "sparrow"
-        # Basic character filtering (allow letters, numbers for things like "Type 2", spaces, hyphens)
         cleaned = re.sub(r"[^a-z0-9\s\-]", "", cleaned).strip()
-        # Normalize whitespace
         cleaned = " ".join(cleaned.split())
-        # Filter out very generic or uncertain answers post-cleaning
         uncertain_terms = ["unknown", "not sure", "unclear", "difficult to say", "generic", "common bird", "no bird", "not a bird"]
         if any(term in cleaned for term in uncertain_terms) or len(cleaned) < VIDEO_VQA_MIN_ANSWER_LENGTH:
-            return "" # Return empty if too generic or short
         return cleaned
@@ -895,27 +877,19 @@ class GaiaLevel1Agent:
                 'quiet': True,
                 'max_filesize': 75 * 1024 * 1024,
                 'overwrites': True, 'noprogress': True, 'noplaylist': True, 'socket_timeout': 20,
-                'merge_output_format': 'mp4', # Encourage mp4 output if merging
-                # Removed 'postprocessors': [{'key': 'FFmpegExtractAudio', ...}]
             }
             gaia_logger.info(f"Attempting to download video: {video_url}")
             with yt_dlp.YoutubeDL(ydl_opts) as ydl:
                 info_dict = ydl.extract_info(video_url, download=True)
-                video_file_path = ydl.prepare_filename(info_dict) # Get the final path
-                # Check if downloaded file is indeed a video format recognised by OpenCV
-                # Common video extensions that OpenCV usually handles well.
-                # This check is made more robust by also trying to open it.
                 if not video_file_path or not any(video_file_path.lower().endswith(ext) for ext in ['.mp4', '.webm', '.avi', '.mkv', '.mov', '.flv']):
                     gaia_logger.warning(f"Downloaded file '{video_file_path}' might not be a standard video format or download failed to produce one. Will attempt to open.")
-                    # Try to find a plausible video file if the main one looks suspicious
                     possible_video_files = [f for f in os.listdir(temp_dir) if f.startswith(info_dict.get('id','')) and any(f.lower().endswith(ext) for ext in ['.mp4', '.webm'])]
                     if possible_video_files:
                         video_file_path = os.path.join(temp_dir, possible_video_files[0])
                         gaia_logger.info(f"Using alternative video file from temp_dir: {video_file_path}")
-                    # else: # The cap.isOpened() check below will handle if it's truly unusable
-                        # gaia_logger.error(f"No suitable video file found in temp_dir for {info_dict.get('id','')}")
-                        # return "Video download resulted in a non-video or unusable file."
             if not video_file_path or not os.path.exists(video_file_path):
@@ -935,9 +909,9 @@ class GaiaLevel1Agent:
             total_frames_video = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
             fps = cap.get(cv2.CAP_PROP_FPS)
-            if not fps or fps <= 0: fps = 25 # Default fps if detection fails or is zero
-            frame_interval = max(1, int(fps)) # Process ~1 frame per second
             frames_analyzed_count = 0
             current_frame_num = 0
@@ -945,11 +919,11 @@ class GaiaLevel1Agent:
             gaia_logger.info(f"Video Info: ~{total_frames_video // fps if fps > 0 else total_frames_video:.0f}s, {fps:.2f} FPS. Analyzing ~1 frame/sec up to {VIDEO_MAX_FRAMES_TO_PROCESS} frames.")
             while cap.isOpened() and frames_analyzed_count < VIDEO_MAX_FRAMES_TO_PROCESS:
-                cap.set(cv2.CAP_PROP_POS_FRAMES, current_frame_num) # Jump to frame
                 ret, frame_data = cap.read()
                 if not ret: break
-                timestamp_sec = current_frame_num / fps if fps > 0 else frames_analyzed_count # Fallback timestamp if fps is bad
                 gaia_logger.info(f"Processing frame {current_frame_num} (analyzed {frames_analyzed_count+1}/{VIDEO_MAX_FRAMES_TO_PROCESS}) at ~{timestamp_sec:.1f}s")
                 try:
@@ -962,11 +936,9 @@ class GaiaLevel1Agent:
                 detected_objects = detector(pil_image)
                 bird_crops_this_frame = []
                 for obj in detected_objects:
-                    # Check label case-insensitively
                     if obj['label'].lower() == 'bird' and obj['score'] > VIDEO_CONFIDENCE_THRESHOLD_BIRD:
                         box = obj['box']
                         xmin, ymin, xmax, ymax = box['xmin'], box['ymin'], box['xmax'], box['ymax']
-                        # Ensure box coordinates are valid
                         if not (0 <= xmin < xmax <= pil_image.width and 0 <= ymin < ymax <= pil_image.height):
                             gaia_logger.debug(f"Invalid box for bird: {box}, img size: {pil_image.size}")
                             continue
@@ -991,7 +963,7 @@ class GaiaLevel1Agent:
                         vqa_answer_list = vqa_model(bird_crop_img, question=vqa_question, top_k=1)
                         raw_vqa_answer_text = ""
-                        vqa_confidence = VIDEO_VQA_CONFIDENCE_THRESHOLD # Default
                         if isinstance(vqa_answer_list, list) and vqa_answer_list:
                             raw_vqa_answer_text = vqa_answer_list[0].get('answer', "")
@@ -1023,7 +995,6 @@ class GaiaLevel1Agent:
                 current_frame_num += frame_interval
                 frames_analyzed_count += 1
-            # cap.release() should be in finally
             context_str = (f"Video analysis result: The highest number of distinct bird species types inferred simultaneously "
                            f"in the analyzed portion of the video (up to {VIDEO_MAX_FRAMES_TO_PROCESS} frames) was {max_simultaneous_species}. "
@@ -1034,7 +1005,7 @@ class GaiaLevel1Agent:
         except yt_dlp.utils.DownloadError as e:
             gaia_logger.error(f"yt-dlp download error for {video_url}: {str(e)}")
             msg_str = str(e)
-            clean_msg = msg_str # Default to full message
             if "Unsupported URL" in msg_str: clean_msg = "Unsupported video URL."
             elif "video unavailable" in msg_str.lower(): clean_msg = "Video is unavailable."
             elif "private video" in msg_str.lower(): clean_msg = "Video is private."
@@ -1043,8 +1014,7 @@ class GaiaLevel1Agent:
                  clean_msg = "Video download failed due to YouTube restrictions (e.g., sign-in, cookies, or authentication required)."
             elif "HTTP Error 403" in msg_str or "Forbidden" in msg_str : clean_msg = "Access to video denied (Forbidden/403)."
             elif "HTTP Error 404" in msg_str or "Not Found" in msg_str : clean_msg = "Video not found (404)."
-            # Keep the message relatively concise for the LLM
-            return f"Video download failed: {clean_msg[:250] + '...' if len(clean_msg) > 250 else clean_msg}" # Limit length of detailed message
         except Exception as e:
             gaia_logger.error(f"Error during video analysis for {video_url}: {e}", exc_info=True)
@@ -1054,7 +1024,7 @@ class GaiaLevel1Agent:
                 cap.release()
                 gaia_logger.info("Video capture released.")
             if temp_dir_obj:
-                temp_dir_path_for_log = temp_dir_obj.name # Store before cleanup for logging
                 try:
                     temp_dir_obj.cleanup()
                     gaia_logger.info(f"Successfully cleaned up temp video directory: {temp_dir_path_for_log}")
@@ -1073,10 +1043,9 @@ class GaiaLevel1Agent:
             reasoning_trace = parts[0].strip()
             model_answer = parts[1].strip()
         else:
-            reasoning_trace = llm_text # If sentinel not found, assume whole output is reasoning
             lines = llm_text.strip().split('\n')
-            # Try to take the last non-empty line as answer, or a default if all reasoning
-            model_answer = "Could not parse answer" # Default if no clear answer found
             for line in reversed(lines):
                 if line.strip():
                     model_answer = line.strip()
@@ -1089,11 +1058,10 @@ class GaiaLevel1Agent:
         default_model_answer = "Information not available in provided context"
         default_reasoning = "LLM processing failed or context insufficient."
-        if not self.llm_model or not genai or not GenerationConfig or not FinishReason or not HarmCategory or not HarmBlockThreshold: # Added more checks
             gaia_logger.warning("LLM model (Gemini) or necessary enums/configs not available for answer formulation.")
             reasoning = "LLM model (Gemini) or its configuration components not available for answer formulation."
             answer_val = default_model_answer
-            # Provide some context indication even if LLM is down
             if web_context and file_context:
                 reasoning += " Context from file and web was found but not processed by LLM."
             elif web_context:
@@ -1125,7 +1093,7 @@ class GaiaLevel1Agent:
             file_header = "\n\nContext from Provided Document:\n---"
             file_footer = "\n---"
             len_web_ctx = len(web_context) if web_context else 0
-            max_len_for_file = MAX_CONTEXT_LENGTH_LLM - current_prompt_text_len - len_web_ctx - len(file_header) - len(file_footer) - 500 # Buffer
             if max_len_for_file > 100 :
                 truncated_file_context = file_context[:max_len_for_file]
@@ -1139,11 +1107,10 @@ class GaiaLevel1Agent:
         if web_context:
             header_text = "\n\nContext from External Sources (Web/Video):\n---"
-            if "Video analysis result:" in web_context and "Source [" not in web_context: # Only video
                 header_text = "\n\nContext from Video Analysis:\n---"
-            elif "Source [" in web_context and "Video analysis result:" not in web_context: # Only web
                 header_text = "\n\nContext from Web Search Results:\n---"
-            # If both, the generic "External Sources" is fine.
             web_footer = "\n---"
             available_len_for_web = MAX_CONTEXT_LENGTH_LLM - current_prompt_text_len - len(header_text) - len(web_footer) - 300
@@ -1186,11 +1153,10 @@ class GaiaLevel1Agent:
                 return {"model_answer": "LLM Error: No response", "reasoning_trace": "LLM did not provide any response candidates."}
             candidate = response.candidates[0]
-            # Check candidate's finish_reason
             if candidate.finish_reason != FinishReason.STOP:
                 reason_name = candidate.finish_reason.name if hasattr(candidate.finish_reason, 'name') else str(candidate.finish_reason)
                 safety_ratings_str = ""
-                if candidate.safety_ratings: # Check if safety_ratings exist
                     relevant_ratings = [
                         f"{sr.category.name.split('_')[-1] if hasattr(sr.category, 'name') else 'CAT?'}: {(sr.probability.name if hasattr(sr.probability, 'name') else 'PROB?')}"
                         for sr in candidate.safety_ratings if (hasattr(sr,'blocked') and sr.blocked) or (hasattr(sr,'probability') and HarmProbability and sr.probability.value >= HarmProbability.MEDIUM.value)
@@ -1198,7 +1164,7 @@ class GaiaLevel1Agent:
                     if relevant_ratings: safety_ratings_str = "; ".join(relevant_ratings)
                 gaia_logger.warning(f"Gemini candidate did not finish successfully. Reason: {reason_name}. Safety Ratings: {safety_ratings_str if safety_ratings_str else 'N/A'}")
                 user_message = "LLM Error: Response incomplete"
                 if candidate.finish_reason == FinishReason.SAFETY: user_message = "LLM Error: Response blocked for safety"
                 elif candidate.finish_reason == FinishReason.MAX_TOKENS: user_message = "LLM Error: Response truncated (max tokens)"
@@ -1209,19 +1175,19 @@ class GaiaLevel1Agent:
                     "reasoning_trace": f"LLM generation stopped. Reason: {reason_name}. " + (f"Details: {safety_ratings_str}" if safety_ratings_str else "")
                 }
-            llm_answer_text = response.text # Safe to access now
             gaia_logger.info(f"LLM Raw Full Answer (first 200 chars): {llm_answer_text[:200]}...")
             return self._parse_llm_output(llm_answer_text)
         except ValueError as ve:
             if "finish_reason" in str(ve).lower() and ("part" in str(ve).lower() or "candidate" in str(ve).lower()):
-                gaia_logger.error(f"ValueError accessing Gemini response.text, likely due to non-STOP finish_reason not caught explicitly: {ve}", exc_info=False) # exc_info=False as it's handled
                 fr_from_ex = "Unknown (from ValueError)"
-                match_fr = re.search(r"finish_reason.*?is\s*(\w+)", str(ve), re.IGNORECASE) # Try to get name or number
                 if match_fr: fr_from_ex = match_fr.group(1)
                 return {"model_answer": "LLM Error: Invalid response state",
                         "reasoning_trace": f"Could not parse LLM response. Finish reason possibly {fr_from_ex}. Details: {str(ve)[:150]}"}
-            else: # Other ValueErrors
                 gaia_logger.error(f"ValueError during Gemini call or processing: {ve}", exc_info=True)
                 return {"model_answer": "LLM Error: Value error", "reasoning_trace": f"A value error occurred: {str(ve)}"}
         except Exception as e:
@@ -1240,7 +1206,6 @@ class GaiaLevel1Agent:
             elif "InternalServerError" in error_type_name or "500" in error_message :
                 answer_val = "LLM server error"
                 reasoning = "Error: LLM experienced an internal server error."
-            # Add specific handling for google.api_core.exceptions.ServiceUnavailable (503) if it occurs
             elif "ServiceUnavailable" in error_type_name or "503" in error_message:
                 answer_val = "LLM service unavailable"
                 reasoning = "Error: LLM service is temporarily unavailable (503)."
@@ -1253,15 +1218,13 @@ class GaiaLevel1Agent:
         q_lower = question.lower().strip()
         video_context_str: Optional[str] = None
-        # Regex for YouTube URLs (watch, short, and youtu.be forms)
         video_url_match = re.search(r"(https?://(?:www\.)?(?:youtube\.com/(?:watch\?v=|shorts/)|youtu\.be/)[\w\-=&%]+)", question)
-        video_keywords = ["video", "youtube.com", "youtu.be", "clip", "recording"] # Broader keywords
         species_keywords = ["species", "bird", "birds", "type of bird", "kinds of bird", "different birds"]
         action_keywords = ["count", "how many", "number of", "simultaneously", "at the same time", "on camera", "identify", "list"]
-        # Trigger video analysis if a URL is found AND relevant keywords are present
         if video_url_match and \
            any(vk in q_lower for vk in video_keywords) and \
            any(sk in q_lower for sk in species_keywords) and \
@@ -1286,27 +1249,22 @@ class GaiaLevel1Agent:
         web_rag_ctx_str: Optional[str] = None
         needs_web_rag = True
-        # Logic to decide if RAG web search is needed
         if video_context_str:
-            # If video analysis seems to directly answer a counting/identification question from video
             if "Video analysis result:" in video_context_str and not "download failed" in video_context_str.lower() and not "skipped" in video_context_str.lower():
                  if (("count" in q_lower or "how many" in q_lower or "number of" in q_lower) and ("simultaneously" in q_lower or "at the same time" in q_lower or "distinct" in q_lower)) and any(sk_q in q_lower for sk_q in species_keywords):
-                    needs_web_rag = False # Video analysis likely sufficient
                     gaia_logger.info("Video context seems primary for a specific video counting question; web RAG may be skipped.")
-        if file_ctx_str and len(file_ctx_str) > 100 and not video_context_str: # Only consider file if no video context
-            # Keywords suggesting the answer is likely within the document
             doc_can_answer_kws = ["summarize", "according to the document", "in the provided text", "based on the file content", "from this file", "in this data"]
-            # Keywords suggesting external info is needed despite file
             web_still_needed_kws = ["what is the current", "latest news on", "public opinion of", "search for more about", "compare this to", "what happened after"]
             if any(kw in q_lower for kw in doc_can_answer_kws) and not any(kw in q_lower for kw in web_still_needed_kws):
                 needs_web_rag = False
                 gaia_logger.info("File context seems primary; web RAG may be skipped.")
-            # Less strong heuristic: if it's a statement or simple file query not asking for external comparison/update
             elif not any(kw in q_lower for kw in web_still_needed_kws) and not question.strip().endswith("?"):
-                 if not any(qk in q_lower for qk in ["why is", "how does", "explain the impact of", "what if"]): # Questions often needing broader context
                      needs_web_rag = False
                      gaia_logger.info("File context seems sufficient for non-complex query; web RAG may be skipped.")
@@ -1317,8 +1275,6 @@ class GaiaLevel1Agent:
         if needs_web_rag:
             search_q = question.replace("?", "").strip()
-            # If video context failed, the question might still be about the video's topic, so RAG is useful.
-            # If file context is present but RAG is still needed, LLM will have to reconcile.
             rag_res = self.rag_pipeline.analyze(query=search_q, force_refresh=False)
             if rag_res:
                 snippets = []
@@ -1326,7 +1282,7 @@ class GaiaLevel1Agent:
                     title = res_item.get('title','N/A')
                     body = res_item.get('body','')
                     href = res_item.get('href','#')
-                    provider_info = res_item.get('query_tag','WebSearch') # Can be refined if RAG provides more source details
                     source_type = "EnrichedContent" if res_item.get('enriched') else "Snippet"
                     body_preview = (body[:1500] + "...") if len(body) > 1500 else body
                     snippets.append(f"Source [{i+1} - {provider_info}]: {title}\nURL: {href}\n{source_type}: {body_preview}\n---")
@@ -1336,10 +1292,8 @@ class GaiaLevel1Agent:
         final_llm_external_context_parts = []
         if video_context_str:
-            final_llm_external_context_parts.append(f"{video_context_str}") # Header already in video_context_str
         if web_rag_ctx_str:
-            # No separate header needed if video_context_str already has "Video Analysis Context:"
-            # and web_rag_ctx_str is structured with "Source [n]:"
             final_llm_external_context_parts.append(f"{web_rag_ctx_str}")
         final_llm_external_context = "\n\n---\n\n".join(final_llm_external_context_parts).strip() if final_llm_external_context_parts else None
@@ -1364,9 +1318,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     except Exception as e: return f"Error fetching questions: {e}", None
     results_log_for_gradio, answers_for_api_submission = [], []
-    # Use a more conservative default RPM if not set, matching free tier common limits.
-    GEMINI_RPM_LIMIT = int(os.getenv("GEMINI_RPM_LIMIT", "10")) # Default to 10 RPM if not set, as per common free tier
-    # Add a small buffer to sleep time
     sleep_llm = (60.0 / GEMINI_RPM_LIMIT) + 0.5 if GEMINI_RPM_LIMIT > 0 else 0.2
     gaia_logger.info(f"Using Gemini RPM limit: {GEMINI_RPM_LIMIT}, LLM call sleep: {sleep_llm:.2f}s")
@@ -1422,7 +1374,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     except Exception as e: return f"Submission Failed: {e}", pd.DataFrame(results_log_for_gradio)
 with gr.Blocks(title="GAIA RAG Agent - Advanced") as demo:
-    gr.Markdown("# GAIA Agent")
     gr.Markdown(
         """
         **Instructions:**
@@ -1435,11 +1387,11 @@ with gr.Blocks(title="GAIA RAG Agent - Advanced") as demo:
     gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
     status_output = gr.Textbox(label="Status / Submission Result", lines=5, interactive=False)
-    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True, max_rows=20, height=500) # Added max_rows and height
     run_button.click(fn=run_and_submit_all, inputs=[], outputs=[status_output, results_table])
 if __name__ == "__main__":
-    print("\n" + "-"*30 + " GAIA Agent - RAG, FileProc, Video Analysis " + "-"*30)
     required_env = {
         "GOOGLE_GEMINI_API_KEY": GOOGLE_GEMINI_API_KEY,
         "GOOGLE_API_KEY": GOOGLE_CUSTOM_SEARCH_API_KEY,
@@ -1461,11 +1413,10 @@ if __name__ == "__main__":
     if missing_keys: print(f"\n--- PLEASE SET MISSING ENV VARS FOR FULL FUNCTIONALITY: {', '.join(missing_keys)} ---\n")
     else: print("\n--- All major API Key Environment Variables found. ---")
-    # Log the Gemini RPM limit being used
     gemini_rpm = os.getenv("GEMINI_RPM_LIMIT", "10 (defaulted)")
     print(f"--- Using GEMINI_RPM_LIMIT: {gemini_rpm} (Ensure this matches your Gemini API plan limits) ---")
-    print("-"*(60 + len(" GAIA Agent - RAG, FileProc, Video Analysis ")) + "\n")
     demo.launch(server_name="0.0.0.0", server_port=7860, debug=False, share=False)

 from concurrent.futures import ThreadPoolExecutor, as_completed
 from concurrent.futures import TimeoutError as FuturesTimeoutError
 from collections import defaultdict
+import tempfile
 try:
     import google.generativeai as genai
 except ImportError:
     genai = None
     GenerationConfig = None
+    HarmCategory = None
+    HarmBlockThreshold = None
+    FinishReason = None
+    HarmProbability = None
     print("WARNING: google-generativeai library not found. Install with: pip install google-generativeai")
 try:
     global video_object_detector_pipeline, VIDEO_ANALYSIS_DEVICE
     if video_object_detector_pipeline is None and hf_transformers_pipeline and torch:
         try:
             device_id = 0 if torch.cuda.is_available() else -1
+            if VIDEO_ANALYSIS_DEVICE == -1 : VIDEO_ANALYSIS_DEVICE = device_id
             target_device = VIDEO_ANALYSIS_DEVICE if VIDEO_ANALYSIS_DEVICE != -1 else device_id
             return None
     return video_object_detector_pipeline
+def _get_video_vqa_pipeline():
     global video_vqa_pipeline, VIDEO_ANALYSIS_DEVICE
     if video_vqa_pipeline is None and hf_transformers_pipeline and torch:
         try:
             target_device = VIDEO_ANALYSIS_DEVICE if VIDEO_ANALYSIS_DEVICE != -1 else device_id
             video_vqa_pipeline = hf_transformers_pipeline(
+                "visual-question-answering",
+                model=VIDEO_ANALYSIS_VQA_MODEL,
                 device=target_device
             )
             gaia_logger.info(f"Video VQA pipeline ('{VIDEO_ANALYSIS_VQA_MODEL}') initialized on {'cuda' if target_device==0 else 'cpu'}.")
                     if not df_list_for_fallback and xls:
                          for sheet_name in xls.sheet_names:
                             df_list_for_fallback.append((sheet_name, xls.parse(sheet_name)))
+                    elif not xls and not df_list_for_fallback:
                         temp_xls = pd.ExcelFile(io.BytesIO(content), engine='openpyxl')
                         for sheet_name in temp_xls.sheet_names:
                             df_list_for_fallback.append((sheet_name, temp_xls.parse(sheet_name)))
                         page_text = page.extract_text()
                         if page_text:
                             text_content += page_text + "\n"
+                        if len(text_content) > MAX_FILE_CONTEXT_LENGTH * 1.2:
                             break
             if not text_content:
                 return f"PDF Document: '{filename}'. No text could be extracted or PDF is empty."
             self.delete(key)
         return None
     def set(self, key: Any, value: Any):
+        if key in self._cache: self.delete(key)
         while len(self._cache) >= self.max_size and self._access_order:
             old_key = self._access_order.pop(0)
+            if old_key in self._cache:
                 del self._cache[old_key]; del self._timestamps[old_key]
         try: self._cache[key] = copy.deepcopy(value)
+        except TypeError: self._cache[key] = value
         self._timestamps[key] = time.time(); self._access_order.append(key)
     def delete(self, key: Any):
         if key in self._cache:
         max_r_pq = cfg_search.get('default_max_results', 3)
         cache_key = (q, max_r_pq, total_lim, enrich_en, enrich_cnt)
         if not force_refresh and (cached := self.pipeline_cache.get(cache_key)) is not None: return cached
+        if force_refresh: self.search_client.cache.clear();
+        if self.enricher and force_refresh: self.enricher.cache.clear()
         all_res, res_proc = [], ResultProcessor(self.config)
         staged_qs = GaiaQueryBuilder(q, self.config).get_queries()
         for stage, qs_in_stage in staged_qs.items():
             for query_s, cat in qs_in_stage:
+                if len(all_res) >= total_lim * 2: break
                 s_res = self.search_client.search(query_s, max_results=max_r_pq, force_refresh=force_refresh)
                 all_res.extend(res_proc.process_batch(s_res or [], query_s, initial_cat=cat))
         all_res.sort(key=lambda x: x.get('combined_score', 0), reverse=True)
             try:
                 genai.configure(api_key=GOOGLE_GEMINI_API_KEY)
                 model_name = 'gemini-2.5-flash-preview-05-20'
                 self.llm_model = genai.GenerativeModel(model_name)
                 gaia_logger.info(f"Gemini LLM ('{model_name}') initialized.")
             except Exception as e:
+                gaia_logger.error(f"Error initializing Gemini LLM ('{model_name}'): {e}", exc_info=True)
+                # No fallback, as per user instruction.
         else:
             gaia_logger.warning("Gemini LLM dependencies or API key missing.")
     def _fetch_and_process_file_content(self, task_id: str) -> Optional[str]:
         file_url = f"{self.api_url}/files/{task_id}"
+        for attempt in range(2):
             try:
                 response = requests.get(file_url, timeout=AGENT_DEFAULT_TIMEOUT)
                 response.raise_for_status()
+                filename = FileProcessor._get_filename_from_url(response.url)
                 content_disposition = response.headers.get('Content-Disposition')
                 if content_disposition:
                     header_filename = FileProcessor._get_filename_from_url(content_disposition)
             except requests.exceptions.HTTPError as e:
                 if e.response.status_code == 404:
                     gaia_logger.warning(f"File not found for task {task_id}: {file_url}")
+                    return None
                 gaia_logger.warning(f"HTTP error fetching file {task_id}: {e}")
             except requests.exceptions.Timeout:
                 gaia_logger.warning(f"Timeout fetching file {task_id}")
         cleaned = answer_text.lower().strip()
         prefixes_to_remove = [
             "a type of ", "a variety of ", "it's a ", "it is a ", "an ", "a ", "the ",
             "this is a ", "this bird is a ", "it appears to be a ", "looks like a ",
             if cleaned.startswith(prefix):
                 cleaned = cleaned[len(prefix):]
         suffixes_to_remove = [" bird", " species"]
         for suffix in suffixes_to_remove:
             if cleaned.endswith(suffix):
                 cleaned = cleaned[:-len(suffix)]
+        cleaned = re.sub(r"\s*\(.*\)\s*$", "", cleaned).strip()
+        cleaned = re.sub(r",\s*which is.*$", "", cleaned).strip()
         cleaned = re.sub(r"[^a-z0-9\s\-]", "", cleaned).strip()
         cleaned = " ".join(cleaned.split())
         uncertain_terms = ["unknown", "not sure", "unclear", "difficult to say", "generic", "common bird", "no bird", "not a bird"]
         if any(term in cleaned for term in uncertain_terms) or len(cleaned) < VIDEO_VQA_MIN_ANSWER_LENGTH:
+            return ""
         return cleaned
                 'quiet': True,
                 'max_filesize': 75 * 1024 * 1024,
                 'overwrites': True, 'noprogress': True, 'noplaylist': True, 'socket_timeout': 20,
+                'merge_output_format': 'mp4',
             }
             gaia_logger.info(f"Attempting to download video: {video_url}")
             with yt_dlp.YoutubeDL(ydl_opts) as ydl:
                 info_dict = ydl.extract_info(video_url, download=True)
+                video_file_path = ydl.prepare_filename(info_dict)
                 if not video_file_path or not any(video_file_path.lower().endswith(ext) for ext in ['.mp4', '.webm', '.avi', '.mkv', '.mov', '.flv']):
                     gaia_logger.warning(f"Downloaded file '{video_file_path}' might not be a standard video format or download failed to produce one. Will attempt to open.")
                     possible_video_files = [f for f in os.listdir(temp_dir) if f.startswith(info_dict.get('id','')) and any(f.lower().endswith(ext) for ext in ['.mp4', '.webm'])]
                     if possible_video_files:
                         video_file_path = os.path.join(temp_dir, possible_video_files[0])
                         gaia_logger.info(f"Using alternative video file from temp_dir: {video_file_path}")
             if not video_file_path or not os.path.exists(video_file_path):
             total_frames_video = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
             fps = cap.get(cv2.CAP_PROP_FPS)
+            if not fps or fps <= 0: fps = 25
+            frame_interval = max(1, int(fps))
             frames_analyzed_count = 0
             current_frame_num = 0
             gaia_logger.info(f"Video Info: ~{total_frames_video // fps if fps > 0 else total_frames_video:.0f}s, {fps:.2f} FPS. Analyzing ~1 frame/sec up to {VIDEO_MAX_FRAMES_TO_PROCESS} frames.")
             while cap.isOpened() and frames_analyzed_count < VIDEO_MAX_FRAMES_TO_PROCESS:
+                cap.set(cv2.CAP_PROP_POS_FRAMES, current_frame_num)
                 ret, frame_data = cap.read()
                 if not ret: break
+                timestamp_sec = current_frame_num / fps if fps > 0 else frames_analyzed_count
                 gaia_logger.info(f"Processing frame {current_frame_num} (analyzed {frames_analyzed_count+1}/{VIDEO_MAX_FRAMES_TO_PROCESS}) at ~{timestamp_sec:.1f}s")
                 try:
                 detected_objects = detector(pil_image)
                 bird_crops_this_frame = []
                 for obj in detected_objects:
                     if obj['label'].lower() == 'bird' and obj['score'] > VIDEO_CONFIDENCE_THRESHOLD_BIRD:
                         box = obj['box']
                         xmin, ymin, xmax, ymax = box['xmin'], box['ymin'], box['xmax'], box['ymax']
                         if not (0 <= xmin < xmax <= pil_image.width and 0 <= ymin < ymax <= pil_image.height):
                             gaia_logger.debug(f"Invalid box for bird: {box}, img size: {pil_image.size}")
                             continue
                         vqa_answer_list = vqa_model(bird_crop_img, question=vqa_question, top_k=1)
                         raw_vqa_answer_text = ""
+                        vqa_confidence = VIDEO_VQA_CONFIDENCE_THRESHOLD
                         if isinstance(vqa_answer_list, list) and vqa_answer_list:
                             raw_vqa_answer_text = vqa_answer_list[0].get('answer', "")
                 current_frame_num += frame_interval
                 frames_analyzed_count += 1
             context_str = (f"Video analysis result: The highest number of distinct bird species types inferred simultaneously "
                            f"in the analyzed portion of the video (up to {VIDEO_MAX_FRAMES_TO_PROCESS} frames) was {max_simultaneous_species}. "
         except yt_dlp.utils.DownloadError as e:
             gaia_logger.error(f"yt-dlp download error for {video_url}: {str(e)}")
             msg_str = str(e)
+            clean_msg = msg_str
             if "Unsupported URL" in msg_str: clean_msg = "Unsupported video URL."
             elif "video unavailable" in msg_str.lower(): clean_msg = "Video is unavailable."
             elif "private video" in msg_str.lower(): clean_msg = "Video is private."
                  clean_msg = "Video download failed due to YouTube restrictions (e.g., sign-in, cookies, or authentication required)."
             elif "HTTP Error 403" in msg_str or "Forbidden" in msg_str : clean_msg = "Access to video denied (Forbidden/403)."
             elif "HTTP Error 404" in msg_str or "Not Found" in msg_str : clean_msg = "Video not found (404)."
+            return f"Video download failed: {clean_msg[:250] + '...' if len(clean_msg) > 250 else clean_msg}"
         except Exception as e:
             gaia_logger.error(f"Error during video analysis for {video_url}: {e}", exc_info=True)
                 cap.release()
                 gaia_logger.info("Video capture released.")
             if temp_dir_obj:
+                temp_dir_path_for_log = temp_dir_obj.name
                 try:
                     temp_dir_obj.cleanup()
                     gaia_logger.info(f"Successfully cleaned up temp video directory: {temp_dir_path_for_log}")
             reasoning_trace = parts[0].strip()
             model_answer = parts[1].strip()
         else:
+            reasoning_trace = llm_text
             lines = llm_text.strip().split('\n')
+            model_answer = "Could not parse answer"
             for line in reversed(lines):
                 if line.strip():
                     model_answer = line.strip()
         default_model_answer = "Information not available in provided context"
         default_reasoning = "LLM processing failed or context insufficient."
+        if not self.llm_model or not genai or not GenerationConfig or not FinishReason or not HarmCategory or not HarmBlockThreshold:
             gaia_logger.warning("LLM model (Gemini) or necessary enums/configs not available for answer formulation.")
             reasoning = "LLM model (Gemini) or its configuration components not available for answer formulation."
             answer_val = default_model_answer
             if web_context and file_context:
                 reasoning += " Context from file and web was found but not processed by LLM."
             elif web_context:
             file_header = "\n\nContext from Provided Document:\n---"
             file_footer = "\n---"
             len_web_ctx = len(web_context) if web_context else 0
+            max_len_for_file = MAX_CONTEXT_LENGTH_LLM - current_prompt_text_len - len_web_ctx - len(file_header) - len(file_footer) - 500
             if max_len_for_file > 100 :
                 truncated_file_context = file_context[:max_len_for_file]
         if web_context:
             header_text = "\n\nContext from External Sources (Web/Video):\n---"
+            if "Video analysis result:" in web_context and "Source [" not in web_context:
                 header_text = "\n\nContext from Video Analysis:\n---"
+            elif "Source [" in web_context and "Video analysis result:" not in web_context:
                 header_text = "\n\nContext from Web Search Results:\n---"
             web_footer = "\n---"
             available_len_for_web = MAX_CONTEXT_LENGTH_LLM - current_prompt_text_len - len(header_text) - len(web_footer) - 300
                 return {"model_answer": "LLM Error: No response", "reasoning_trace": "LLM did not provide any response candidates."}
             candidate = response.candidates[0]
             if candidate.finish_reason != FinishReason.STOP:
                 reason_name = candidate.finish_reason.name if hasattr(candidate.finish_reason, 'name') else str(candidate.finish_reason)
                 safety_ratings_str = ""
+                if candidate.safety_ratings:
                     relevant_ratings = [
                         f"{sr.category.name.split('_')[-1] if hasattr(sr.category, 'name') else 'CAT?'}: {(sr.probability.name if hasattr(sr.probability, 'name') else 'PROB?')}"
                         for sr in candidate.safety_ratings if (hasattr(sr,'blocked') and sr.blocked) or (hasattr(sr,'probability') and HarmProbability and sr.probability.value >= HarmProbability.MEDIUM.value)
                     if relevant_ratings: safety_ratings_str = "; ".join(relevant_ratings)
                 gaia_logger.warning(f"Gemini candidate did not finish successfully. Reason: {reason_name}. Safety Ratings: {safety_ratings_str if safety_ratings_str else 'N/A'}")
                 user_message = "LLM Error: Response incomplete"
                 if candidate.finish_reason == FinishReason.SAFETY: user_message = "LLM Error: Response blocked for safety"
                 elif candidate.finish_reason == FinishReason.MAX_TOKENS: user_message = "LLM Error: Response truncated (max tokens)"
                     "reasoning_trace": f"LLM generation stopped. Reason: {reason_name}. " + (f"Details: {safety_ratings_str}" if safety_ratings_str else "")
                 }
+            llm_answer_text = response.text
             gaia_logger.info(f"LLM Raw Full Answer (first 200 chars): {llm_answer_text[:200]}...")
             return self._parse_llm_output(llm_answer_text)
         except ValueError as ve:
             if "finish_reason" in str(ve).lower() and ("part" in str(ve).lower() or "candidate" in str(ve).lower()):
+                gaia_logger.error(f"ValueError accessing Gemini response.text, likely due to non-STOP finish_reason not caught explicitly: {ve}", exc_info=False)
                 fr_from_ex = "Unknown (from ValueError)"
+                match_fr = re.search(r"finish_reason.*?is\s*(\w+)", str(ve), re.IGNORECASE)
                 if match_fr: fr_from_ex = match_fr.group(1)
                 return {"model_answer": "LLM Error: Invalid response state",
                         "reasoning_trace": f"Could not parse LLM response. Finish reason possibly {fr_from_ex}. Details: {str(ve)[:150]}"}
+            else:
                 gaia_logger.error(f"ValueError during Gemini call or processing: {ve}", exc_info=True)
                 return {"model_answer": "LLM Error: Value error", "reasoning_trace": f"A value error occurred: {str(ve)}"}
         except Exception as e:
             elif "InternalServerError" in error_type_name or "500" in error_message :
                 answer_val = "LLM server error"
                 reasoning = "Error: LLM experienced an internal server error."
             elif "ServiceUnavailable" in error_type_name or "503" in error_message:
                 answer_val = "LLM service unavailable"
                 reasoning = "Error: LLM service is temporarily unavailable (503)."
         q_lower = question.lower().strip()
         video_context_str: Optional[str] = None
         video_url_match = re.search(r"(https?://(?:www\.)?(?:youtube\.com/(?:watch\?v=|shorts/)|youtu\.be/)[\w\-=&%]+)", question)
+        video_keywords = ["video", "youtube.com", "youtu.be", "clip", "recording"]
         species_keywords = ["species", "bird", "birds", "type of bird", "kinds of bird", "different birds"]
         action_keywords = ["count", "how many", "number of", "simultaneously", "at the same time", "on camera", "identify", "list"]
         if video_url_match and \
            any(vk in q_lower for vk in video_keywords) and \
            any(sk in q_lower for sk in species_keywords) and \
         web_rag_ctx_str: Optional[str] = None
         needs_web_rag = True
         if video_context_str:
             if "Video analysis result:" in video_context_str and not "download failed" in video_context_str.lower() and not "skipped" in video_context_str.lower():
                  if (("count" in q_lower or "how many" in q_lower or "number of" in q_lower) and ("simultaneously" in q_lower or "at the same time" in q_lower or "distinct" in q_lower)) and any(sk_q in q_lower for sk_q in species_keywords):
+                    needs_web_rag = False
                     gaia_logger.info("Video context seems primary for a specific video counting question; web RAG may be skipped.")
+        if file_ctx_str and len(file_ctx_str) > 100 and not video_context_str:
             doc_can_answer_kws = ["summarize", "according to the document", "in the provided text", "based on the file content", "from this file", "in this data"]
             web_still_needed_kws = ["what is the current", "latest news on", "public opinion of", "search for more about", "compare this to", "what happened after"]
             if any(kw in q_lower for kw in doc_can_answer_kws) and not any(kw in q_lower for kw in web_still_needed_kws):
                 needs_web_rag = False
                 gaia_logger.info("File context seems primary; web RAG may be skipped.")
             elif not any(kw in q_lower for kw in web_still_needed_kws) and not question.strip().endswith("?"):
+                 if not any(qk in q_lower for qk in ["why is", "how does", "explain the impact of", "what if"]):
                      needs_web_rag = False
                      gaia_logger.info("File context seems sufficient for non-complex query; web RAG may be skipped.")
         if needs_web_rag:
             search_q = question.replace("?", "").strip()
             rag_res = self.rag_pipeline.analyze(query=search_q, force_refresh=False)
             if rag_res:
                 snippets = []
                     title = res_item.get('title','N/A')
                     body = res_item.get('body','')
                     href = res_item.get('href','#')
+                    provider_info = res_item.get('query_tag','WebSearch')
                     source_type = "EnrichedContent" if res_item.get('enriched') else "Snippet"
                     body_preview = (body[:1500] + "...") if len(body) > 1500 else body
                     snippets.append(f"Source [{i+1} - {provider_info}]: {title}\nURL: {href}\n{source_type}: {body_preview}\n---")
         final_llm_external_context_parts = []
         if video_context_str:
+            final_llm_external_context_parts.append(f"{video_context_str}")
         if web_rag_ctx_str:
             final_llm_external_context_parts.append(f"{web_rag_ctx_str}")
         final_llm_external_context = "\n\n---\n\n".join(final_llm_external_context_parts).strip() if final_llm_external_context_parts else None
     except Exception as e: return f"Error fetching questions: {e}", None
     results_log_for_gradio, answers_for_api_submission = [], []
+    GEMINI_RPM_LIMIT = int(os.getenv("GEMINI_RPM_LIMIT", "10"))
     sleep_llm = (60.0 / GEMINI_RPM_LIMIT) + 0.5 if GEMINI_RPM_LIMIT > 0 else 0.2
     gaia_logger.info(f"Using Gemini RPM limit: {GEMINI_RPM_LIMIT}, LLM call sleep: {sleep_llm:.2f}s")
     except Exception as e: return f"Submission Failed: {e}", pd.DataFrame(results_log_for_gradio)
 with gr.Blocks(title="GAIA RAG Agent - Advanced") as demo:
+    gr.Markdown("# GAIA Level 1 Agent")
     gr.Markdown(
         """
         **Instructions:**
     gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
     status_output = gr.Textbox(label="Status / Submission Result", lines=5, interactive=False)
+    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True, height=500) # Removed max_rows
     run_button.click(fn=run_and_submit_all, inputs=[], outputs=[status_output, results_table])
 if __name__ == "__main__":
+    print("\n" + "-"*30 + " GAIA Level 1 Agent - RAG, FileProc, Video Analysis " + "-"*30)
     required_env = {
         "GOOGLE_GEMINI_API_KEY": GOOGLE_GEMINI_API_KEY,
         "GOOGLE_API_KEY": GOOGLE_CUSTOM_SEARCH_API_KEY,
     if missing_keys: print(f"\n--- PLEASE SET MISSING ENV VARS FOR FULL FUNCTIONALITY: {', '.join(missing_keys)} ---\n")
     else: print("\n--- All major API Key Environment Variables found. ---")
     gemini_rpm = os.getenv("GEMINI_RPM_LIMIT", "10 (defaulted)")
     print(f"--- Using GEMINI_RPM_LIMIT: {gemini_rpm} (Ensure this matches your Gemini API plan limits) ---")
+    print("-"*(60 + len(" GAIA Level 1 Agent - RAG, FileProc, Video Analysis ")) + "\n")
     demo.launch(server_name="0.0.0.0", server_port=7860, debug=False, share=False)