Final_Assignment_Template

Sleeping

App Files Files Community

ChillThrills commited on May 13, 2025

Commit

86c6b45

1 Parent(s): 64b2383

........

Browse files

Files changed (1) hide show

app.py +48 -100

app.py CHANGED Viewed

@@ -723,7 +723,6 @@ class GaiaLevel1Agent:
                 content_type = response.headers.get("Content-Type", "")
                 processed_content = FileProcessor.process(response.content, filename, content_type)
                 return processed_content
             except requests.exceptions.HTTPError as e:
                 if e.response.status_code == 404:
                     gaia_logger.warning(f"File not found for task {task_id}: {file_url}")
@@ -740,30 +739,29 @@ class GaiaLevel1Agent:
     def _parse_llm_output(self, llm_text: str) -> Dict[str, str]:
         reasoning_trace = ""
         model_answer = ""
         final_answer_sentinel = "FINAL ANSWER:"
         parts = llm_text.split(final_answer_sentinel, 1)
         if len(parts) == 2:
             reasoning_trace = parts[0].strip()
             model_answer = parts[1].strip()
         else:
-            reasoning_trace = llm_text # Fallback: all text is reasoning
             lines = llm_text.strip().split('\n')
-            model_answer = lines[-1].strip() if lines else "Could not parse answer" # Fallback: last line is answer
-            gaia_logger.warning(f"LLM output did not contain '{final_answer_sentinel}'. Using fallback parsing.")
         return {"model_answer": model_answer, "reasoning_trace": reasoning_trace}
     def _formulate_answer_with_llm(self, question: str, file_context: Optional[str], web_context: Optional[str]) -> Dict[str, str]:
-        default_error_answer = "Information not available in provided context"
         default_reasoning = "LLM processing failed or context insufficient."
         if not self.llm_model:
             gaia_logger.warning("LLM model (Gemini) not available for answer formulation.")
             reasoning = "LLM model (Gemini) not available for answer formulation."
-            answer = default_error_answer
             if web_context and file_context:
                 reasoning += " Context from file and web was found but not processed by LLM."
             elif web_context:
@@ -772,7 +770,7 @@ class GaiaLevel1Agent:
                 reasoning += f" File context found: {file_context[:100]}..."
             else:
                  reasoning += " No context found."
-            return {"model_answer": answer, "reasoning_trace": reasoning}
         prompt_parts = [
             "You are a general AI assistant. Your primary goal is to answer the user's question accurately and concisely based *only* on the provided context (from a document and/or web search results).",
@@ -788,13 +786,11 @@ class GaiaLevel1Agent:
         ]
         current_prompt_text_len = sum(len(p) for p in prompt_parts)
         context_added = False
         if file_context:
             file_header = "\n\nContext from Provided Document:\n---"
             file_footer = "\n---"
             max_len_for_file = MAX_CONTEXT_LENGTH_LLM - current_prompt_text_len - (len(web_context) if web_context else 0) - len(file_header) - len(file_footer) - 500
             if max_len_for_file > 100 :
                 truncated_file_context = file_context[:max_len_for_file]
                 if len(file_context) > len(truncated_file_context):
@@ -802,35 +798,26 @@ class GaiaLevel1Agent:
                 prompt_parts.extend([file_header, truncated_file_context, file_footer])
                 current_prompt_text_len += len(file_header) + len(truncated_file_context) + len(file_footer)
                 context_added = True
-            else:
-                gaia_logger.warning("Not enough space for file context in LLM prompt.")
         if web_context:
             web_header = "\n\nContext from Web Search Results:\n---"
             web_footer = "\n---"
             available_len_for_web = MAX_CONTEXT_LENGTH_LLM - current_prompt_text_len - len(web_header) - len(web_footer) - 300
             if available_len_for_web > 100:
                 truncated_web_context = web_context
                 if len(web_context) > available_len_for_web:
                     truncated_web_context = web_context[:available_len_for_web] + "\n... (web context truncated)"
                     gaia_logger.info(f"Truncated web context from {len(web_context)} to {len(truncated_web_context)} chars for LLM.")
                 prompt_parts.extend([web_header, truncated_web_context, web_footer])
                 context_added = True
-            else:
-                gaia_logger.warning("Not enough space for web context in LLM prompt, or web context itself is empty.")
-        if not context_added:
-            prompt_parts.append("\n\nNo document or web context could be provided due to length constraints or availability.")
         prompt_parts.append("\n\nReasoning and Final Answer:")
         final_prompt = "\n".join(prompt_parts)
         gaia_logger.info(f"LLM Prompt (first 300): {final_prompt[:300]}...")
-        gaia_logger.info(f"LLM Prompt (last 300): ...{final_prompt[-300:]}")
         gaia_logger.info(f"LLM Total prompt length: {len(final_prompt)} chars.")
         if not GenerationConfig:
@@ -838,40 +825,29 @@ class GaiaLevel1Agent:
             return {"model_answer": "LLM configuration error", "reasoning_trace": "GenerationConfig not available."}
         try:
-            gen_config = GenerationConfig(
-                temperature=0.1,
-                top_p=0.95,
-                max_output_tokens=2048
-            )
             safety_set = [{"category": c, "threshold": "BLOCK_MEDIUM_AND_ABOVE"} for c in ["HARM_CATEGORY_HARASSMENT", "HARM_CATEGORY_HATE_SPEECH", "HARM_CATEGORY_SEXUALLY_EXPLICIT", "HARM_CATEGORY_DANGEROUS_CONTENT"]]
-            response = self.llm_model.generate_content(
-                final_prompt,
-                generation_config=gen_config,
-                safety_settings=safety_set
-            )
             if not response.candidates or (hasattr(response, 'prompt_feedback') and response.prompt_feedback.block_reason):
                 reason_text = "Unknown"
-                if hasattr(response, 'prompt_feedback') and response.prompt_feedback.block_reason:
-                    reason_text = response.prompt_feedback.block_reason.name
                 gaia_logger.warning(f"Gemini response blocked. Reason: {reason_text}.")
                 return {"model_answer": "Error processing request", "reasoning_trace": f"My response was blocked (Reason: {reason_text})."}
             llm_answer_text = response.text
             gaia_logger.info(f"LLM Raw Full Answer (first 200): {llm_answer_text[:200]}...")
             return self._parse_llm_output(llm_answer_text)
         except Exception as e:
             gaia_logger.error(f"Error calling Gemini API: {e}", exc_info=True)
             error_type_name = type(e).__name__
             reasoning = f"Error calling Gemini API: {error_type_name} - {str(e)}"
-            answer = "LLM API error"
             if "429" in str(e) or "ResourceExhausted" in error_type_name:
-                answer = "LLM rate limit"
                 reasoning = "Error: LLM temporarily unavailable (rate limit)."
-            return {"model_answer": answer, "reasoning_trace": reasoning}
     def __call__(self, question: str, task_id: Optional[str] = None) -> Dict[str, str]:
         gaia_logger.info(f"Agent processing: '{question[:70]}...', TaskID: {task_id}")
@@ -880,58 +856,42 @@ class GaiaLevel1Agent:
         if "what is your name" in q_lower or "who are you" in q_lower:
             return {"model_answer": "general AI assistant", "reasoning_trace": "User asked for my identity."}
         file_ctx_str: Optional[str] = None
         file_kws = ["document", "file", "text", "provide", "attach", "read", "content", "table", "data", "excel", "pdf", "audio", "code", "script", "log"]
         if task_id and (any(kw in q_lower for kw in file_kws) or "this task involves a file" in q_lower):
             file_ctx_str = self._fetch_and_process_file_content(task_id)
-            if file_ctx_str:
-                gaia_logger.info(f"Processed file context ({len(file_ctx_str)} chars) for task {task_id}")
-            else:
-                gaia_logger.warning(f"No file content or failed to process for task {task_id}")
         web_ctx_str: Optional[str] = None
         needs_web = True
         if file_ctx_str and len(file_ctx_str) > 300:
-            web_still_needed_kws = [
-                "what is", "who is", "current", "latest", "news", "public opinion",
-                "recent events", "search for", "find information on", "browse", "look up"
-            ]
             doc_can_answer_kws = ["summarize", "according to the document", "in the provided text"]
             if any(kw in q_lower for kw in doc_can_answer_kws) and not any(kw in q_lower for kw in web_still_needed_kws):
                 needs_web = False
-                gaia_logger.info("Question seems focused on document context, and substantial file context exists. Tentatively skipping web search.")
             elif not any(kw in q_lower for kw in web_still_needed_kws):
                 needs_web = False
-                gaia_logger.info("Substantial file context present and question doesn't strongly imply web search. Skipping web search.")
         if "don't search" in q_lower or "do not search" in q_lower or "without searching" in q_lower:
             needs_web = False
-            gaia_logger.info("Web search explicitly disabled by prompt.")
         if needs_web:
             search_q = question.replace("?", "").strip()
-            gaia_logger.info(f"RAG Pipeline initiated for query: {search_q[:70]}")
             rag_res = self.rag_pipeline.analyze(query=search_q, force_refresh=False)
             if rag_res:
                 snippets = []
                 for i, res_item in enumerate(rag_res):
-                    title = res_item.get('title','N/A')
-                    body = res_item.get('body','')
-                    href = res_item.get('href','#')
-                    provider = res_item.get('query_tag','WebSearch')
-                    prefix = "EnrichedContent" if res_item.get('enriched') else "Snippet"
                     body_preview = (body[:1500] + "...") if len(body) > 1500 else body
                     snippets.append(f"Source [{i+1} - {provider}]: {title}\nURL: {href}\n{prefix}: {body_preview}\n---")
                 web_ctx_str = "\n\n".join(snippets)
-                gaia_logger.info(f"RAG processed {len(rag_res)} sources, total web context length for LLM (pre-truncation): {len(web_ctx_str)} chars.")
-            else:
-                gaia_logger.warning("RAG pipeline yielded no web results for the query.")
         agent_response_dict = self._formulate_answer_with_llm(question, file_ctx_str, web_ctx_str)
         gaia_logger.info(f"LLM-based model_answer (first 70): {agent_response_dict.get('model_answer', '')[:70]}...")
         return agent_response_dict
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     space_id = os.getenv("SPACE_ID")
@@ -947,57 +907,46 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         if not questions_data or not isinstance(questions_data, list): return "Questions list empty/invalid.", None
     except Exception as e: return f"Error fetching questions: {e}", None
-    results_log, answers_payload_for_submission = [], []
     GEMINI_RPM_LIMIT = int(os.getenv("GEMINI_RPM_LIMIT", "60"))
     sleep_llm = (60.0 / GEMINI_RPM_LIMIT) + 0.5 if GEMINI_RPM_LIMIT > 0 else 0.2
     for i, item in enumerate(questions_data):
         task_id, q_text = item.get("task_id"), item.get("question")
         if not task_id or q_text is None:
-            results_log.append({"Task ID": task_id, "Question": q_text, "Model Answer": "SKIPPED", "Reasoning Trace": ""})
             continue
         gaia_logger.info(f"Q {i+1}/{len(questions_data)} - Task: {task_id}")
-        model_answer_val = "AGENT ERROR"
-        reasoning_trace_val = "Agent error occurred."
         try:
             agent_response_dict = agent(question=q_text, task_id=task_id)
-            model_answer_val = agent_response_dict.get("model_answer", "Error: No model_answer key")
-            reasoning_trace_val = agent_response_dict.get("reasoning_trace", "")
-            answers_payload_for_submission.append({
-                "task_id": task_id,
-                "model_answer": model_answer_val,
-                "reasoning_trace": reasoning_trace_val
-            })
-            results_log.append({"Task ID": task_id, "Question": q_text, "Model Answer": model_answer_val, "Reasoning Trace": reasoning_trace_val[:500] + "..." if len(reasoning_trace_val)>500 else reasoning_trace_val})
         except Exception as e:
-            reasoning_trace_val = f"AGENT ERROR: {type(e).__name__} - {e}"
-            answers_payload_for_submission.append({
-                "task_id": task_id,
-                "model_answer": model_answer_val, # "AGENT ERROR"
-                "reasoning_trace": reasoning_trace_val
-            })
-            results_log.append({"Task ID": task_id, "Question": q_text, "Model Answer": model_answer_val, "Reasoning Trace": reasoning_trace_val})
         if i < len(questions_data) - 1: time.sleep(sleep_llm)
-    if not answers_payload_for_submission: return "Agent produced no answers.", pd.DataFrame(results_log or [{"Info": "No questions processed"}])
-    submission_content_lines = []
-    for ans_item in answers_payload_for_submission:
-        submission_entry = {"task_id": ans_item["task_id"], "model_answer": ans_item["model_answer"]}
-        if ans_item.get("reasoning_trace"): # Add reasoning_trace only if it exists and is not empty
-            submission_entry["reasoning_trace"] = ans_item["reasoning_trace"]
-        submission_content_lines.append(json.dumps(submission_entry))
-    submission_json_lines = "\n".join(submission_content_lines)
     submission_payload_for_api = {
         "username": username.strip(),
         "agent_code": agent_code,
-        "answers_jsonl_string": submission_json_lines
     }
-    gaia_logger.info(f"Submitting {len(answers_payload_for_submission)} answers for '{username}'...")
-    gaia_logger.debug(f"Submission payload sample for API: {json.dumps(submission_payload_for_api)[:500]}")
     try:
         response = requests.post(submit_url, json=submission_payload_for_api, timeout=60);
@@ -1006,11 +955,11 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         status = (f"Submission Successful!\nUser: {result_data.get('username')}\nScore: {result_data.get('score','N/A')}% "
                   f"({result_data.get('correct_count','?')}/{result_data.get('total_attempted','?')} correct)\n"
                   f"Msg: {result_data.get('message','No message.')}")
-        return status, pd.DataFrame(results_log)
     except requests.exceptions.HTTPError as e:
         err_detail = f"Server: {e.response.status_code}. Detail: {e.response.text[:200]}"
-        return f"Submission Failed: {err_detail}", pd.DataFrame(results_log)
-    except Exception as e: return f"Submission Failed: {e}", pd.DataFrame(results_log)
 with gr.Blocks(title="GAIA RAG Agent - Advanced") as demo:
     gr.Markdown("# Gaia Level 1 Agent (RAG & FileProcessor) Evaluation Runner")
@@ -1037,7 +986,6 @@ if __name__ == "__main__":
     for k, v in required_env.items(): print(f"✅ {k} found." if v else f"⚠️ WARNING: {k} not set.")
     for lib_name, lib_var in [("transformers", hf_transformers_pipeline), ("torch", torch), ("librosa", librosa), ("openpyxl", openpyxl), ("pdfplumber", pdfplumber)]:
         print(f"✅ {lib_name} lib found." if lib_var else f"⚠️ WARNING: {lib_name} lib missing (some file types may not be processed).")
-    print("👉 REMEMBER TO INSTALL 'tabulate' if you haven't: pip install tabulate")
     if missing_keys: print(f"\n--- PLEASE SET MISSING ENV VARS: {', '.join(missing_keys)} ---\n")
     print("-"*(60 + len(" RAG & FileProcessor Agent App Starting ")) + "\n")
     demo.launch(server_name="0.0.0.0", server_port=7860, debug=False, share=False)

                 content_type = response.headers.get("Content-Type", "")
                 processed_content = FileProcessor.process(response.content, filename, content_type)
                 return processed_content
             except requests.exceptions.HTTPError as e:
                 if e.response.status_code == 404:
                     gaia_logger.warning(f"File not found for task {task_id}: {file_url}")
     def _parse_llm_output(self, llm_text: str) -> Dict[str, str]:
         reasoning_trace = ""
         model_answer = ""
         final_answer_sentinel = "FINAL ANSWER:"
         parts = llm_text.split(final_answer_sentinel, 1)
         if len(parts) == 2:
             reasoning_trace = parts[0].strip()
             model_answer = parts[1].strip()
         else:
+            reasoning_trace = llm_text
             lines = llm_text.strip().split('\n')
+            model_answer = lines[-1].strip() if lines else "Could not parse answer"
+            gaia_logger.warning(f"LLM output did not contain '{final_answer_sentinel}'. Using fallback parsing. Full LLM text: '{llm_text[:200]}...'")
         return {"model_answer": model_answer, "reasoning_trace": reasoning_trace}
     def _formulate_answer_with_llm(self, question: str, file_context: Optional[str], web_context: Optional[str]) -> Dict[str, str]:
+        default_model_answer = "Information not available in provided context"
         default_reasoning = "LLM processing failed or context insufficient."
         if not self.llm_model:
             gaia_logger.warning("LLM model (Gemini) not available for answer formulation.")
             reasoning = "LLM model (Gemini) not available for answer formulation."
+            answer_val = default_model_answer
             if web_context and file_context:
                 reasoning += " Context from file and web was found but not processed by LLM."
             elif web_context:
                 reasoning += f" File context found: {file_context[:100]}..."
             else:
                  reasoning += " No context found."
+            return {"model_answer": answer_val, "reasoning_trace": reasoning}
         prompt_parts = [
             "You are a general AI assistant. Your primary goal is to answer the user's question accurately and concisely based *only* on the provided context (from a document and/or web search results).",
         ]
         current_prompt_text_len = sum(len(p) for p in prompt_parts)
         context_added = False
         if file_context:
             file_header = "\n\nContext from Provided Document:\n---"
             file_footer = "\n---"
             max_len_for_file = MAX_CONTEXT_LENGTH_LLM - current_prompt_text_len - (len(web_context) if web_context else 0) - len(file_header) - len(file_footer) - 500
             if max_len_for_file > 100 :
                 truncated_file_context = file_context[:max_len_for_file]
                 if len(file_context) > len(truncated_file_context):
                 prompt_parts.extend([file_header, truncated_file_context, file_footer])
                 current_prompt_text_len += len(file_header) + len(truncated_file_context) + len(file_footer)
                 context_added = True
+            else: gaia_logger.warning("Not enough space for file context in LLM prompt.")
         if web_context:
             web_header = "\n\nContext from Web Search Results:\n---"
             web_footer = "\n---"
             available_len_for_web = MAX_CONTEXT_LENGTH_LLM - current_prompt_text_len - len(web_header) - len(web_footer) - 300
             if available_len_for_web > 100:
                 truncated_web_context = web_context
                 if len(web_context) > available_len_for_web:
                     truncated_web_context = web_context[:available_len_for_web] + "\n... (web context truncated)"
                     gaia_logger.info(f"Truncated web context from {len(web_context)} to {len(truncated_web_context)} chars for LLM.")
                 prompt_parts.extend([web_header, truncated_web_context, web_footer])
                 context_added = True
+            else: gaia_logger.warning("Not enough space for web context in LLM prompt, or web context itself is empty.")
+        if not context_added: prompt_parts.append("\n\nNo document or web context could be provided due to length constraints or availability.")
         prompt_parts.append("\n\nReasoning and Final Answer:")
         final_prompt = "\n".join(prompt_parts)
         gaia_logger.info(f"LLM Prompt (first 300): {final_prompt[:300]}...")
         gaia_logger.info(f"LLM Total prompt length: {len(final_prompt)} chars.")
         if not GenerationConfig:
             return {"model_answer": "LLM configuration error", "reasoning_trace": "GenerationConfig not available."}
         try:
+            gen_config = GenerationConfig(temperature=0.1, top_p=0.95, max_output_tokens=2048)
             safety_set = [{"category": c, "threshold": "BLOCK_MEDIUM_AND_ABOVE"} for c in ["HARM_CATEGORY_HARASSMENT", "HARM_CATEGORY_HATE_SPEECH", "HARM_CATEGORY_SEXUALLY_EXPLICIT", "HARM_CATEGORY_DANGEROUS_CONTENT"]]
+            response = self.llm_model.generate_content(final_prompt, generation_config=gen_config, safety_settings=safety_set)
             if not response.candidates or (hasattr(response, 'prompt_feedback') and response.prompt_feedback.block_reason):
                 reason_text = "Unknown"
+                if hasattr(response, 'prompt_feedback') and response.prompt_feedback.block_reason: reason_text = response.prompt_feedback.block_reason.name
                 gaia_logger.warning(f"Gemini response blocked. Reason: {reason_text}.")
                 return {"model_answer": "Error processing request", "reasoning_trace": f"My response was blocked (Reason: {reason_text})."}
             llm_answer_text = response.text
             gaia_logger.info(f"LLM Raw Full Answer (first 200): {llm_answer_text[:200]}...")
             return self._parse_llm_output(llm_answer_text)
         except Exception as e:
             gaia_logger.error(f"Error calling Gemini API: {e}", exc_info=True)
             error_type_name = type(e).__name__
             reasoning = f"Error calling Gemini API: {error_type_name} - {str(e)}"
+            answer_val = "LLM API error"
             if "429" in str(e) or "ResourceExhausted" in error_type_name:
+                answer_val = "LLM rate limit"
                 reasoning = "Error: LLM temporarily unavailable (rate limit)."
+            return {"model_answer": answer_val, "reasoning_trace": reasoning}
     def __call__(self, question: str, task_id: Optional[str] = None) -> Dict[str, str]:
         gaia_logger.info(f"Agent processing: '{question[:70]}...', TaskID: {task_id}")
         if "what is your name" in q_lower or "who are you" in q_lower:
             return {"model_answer": "general AI assistant", "reasoning_trace": "User asked for my identity."}
         file_ctx_str: Optional[str] = None
         file_kws = ["document", "file", "text", "provide", "attach", "read", "content", "table", "data", "excel", "pdf", "audio", "code", "script", "log"]
         if task_id and (any(kw in q_lower for kw in file_kws) or "this task involves a file" in q_lower):
             file_ctx_str = self._fetch_and_process_file_content(task_id)
+            if file_ctx_str: gaia_logger.info(f"Processed file context ({len(file_ctx_str)} chars) for task {task_id}")
+            else: gaia_logger.warning(f"No file content or failed to process for task {task_id}")
         web_ctx_str: Optional[str] = None
         needs_web = True
         if file_ctx_str and len(file_ctx_str) > 300:
+            web_still_needed_kws = ["what is", "who is", "current", "latest", "news", "public opinion", "recent events", "search for", "find information on", "browse", "look up"]
             doc_can_answer_kws = ["summarize", "according to the document", "in the provided text"]
             if any(kw in q_lower for kw in doc_can_answer_kws) and not any(kw in q_lower for kw in web_still_needed_kws):
                 needs_web = False
             elif not any(kw in q_lower for kw in web_still_needed_kws):
                 needs_web = False
         if "don't search" in q_lower or "do not search" in q_lower or "without searching" in q_lower:
             needs_web = False
         if needs_web:
             search_q = question.replace("?", "").strip()
             rag_res = self.rag_pipeline.analyze(query=search_q, force_refresh=False)
             if rag_res:
                 snippets = []
                 for i, res_item in enumerate(rag_res):
+                    title, body, href = res_item.get('title','N/A'), res_item.get('body',''), res_item.get('href','#')
+                    provider, prefix = res_item.get('query_tag','WebSearch'), "EnrichedContent" if res_item.get('enriched') else "Snippet"
                     body_preview = (body[:1500] + "...") if len(body) > 1500 else body
                     snippets.append(f"Source [{i+1} - {provider}]: {title}\nURL: {href}\n{prefix}: {body_preview}\n---")
                 web_ctx_str = "\n\n".join(snippets)
+            else: gaia_logger.warning("RAG pipeline yielded no web results.")
         agent_response_dict = self._formulate_answer_with_llm(question, file_ctx_str, web_ctx_str)
         gaia_logger.info(f"LLM-based model_answer (first 70): {agent_response_dict.get('model_answer', '')[:70]}...")
         return agent_response_dict
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     space_id = os.getenv("SPACE_ID")
         if not questions_data or not isinstance(questions_data, list): return "Questions list empty/invalid.", None
     except Exception as e: return f"Error fetching questions: {e}", None
+    results_log_for_gradio, answers_for_api_submission = [], []
     GEMINI_RPM_LIMIT = int(os.getenv("GEMINI_RPM_LIMIT", "60"))
     sleep_llm = (60.0 / GEMINI_RPM_LIMIT) + 0.5 if GEMINI_RPM_LIMIT > 0 else 0.2
     for i, item in enumerate(questions_data):
         task_id, q_text = item.get("task_id"), item.get("question")
+        model_answer_val = "AGENT ERROR"
+        reasoning_trace_val = "Agent error occurred prior to LLM call."
         if not task_id or q_text is None:
+            model_answer_val = "SKIPPED"
+            reasoning_trace_val = "Task ID or question missing."
+            results_log_for_gradio.append({"Task ID": task_id, "Question": q_text, "Submitted Answer": model_answer_val, "Reasoning Trace": reasoning_trace_val})
+            answers_for_api_submission.append({"task_id": task_id, "submitted_answer": model_answer_val})
             continue
         gaia_logger.info(f"Q {i+1}/{len(questions_data)} - Task: {task_id}")
         try:
             agent_response_dict = agent(question=q_text, task_id=task_id)
+            model_answer_val = agent_response_dict.get("model_answer", "Error: No model_answer key in agent response")
+            reasoning_trace_val = agent_response_dict.get("reasoning_trace", "Error: No reasoning_trace key in agent response")
         except Exception as e:
+            gaia_logger.error(f"Error during agent call for task {task_id}: {e}", exc_info=True)
+            model_answer_val = "AGENT EXECUTION ERROR"
+            reasoning_trace_val = f"Agent call failed: {type(e).__name__} - {str(e)}"
+        answers_for_api_submission.append({"task_id": task_id, "submitted_answer": model_answer_val})
+        results_log_for_gradio.append({"Task ID": task_id, "Question": q_text, "Submitted Answer": model_answer_val, "Reasoning Trace (first 500 chars)": reasoning_trace_val[:500] + ("..." if len(reasoning_trace_val) > 500 else "")})
         if i < len(questions_data) - 1: time.sleep(sleep_llm)
+    if not answers_for_api_submission: return "Agent produced no answers for API submission.", pd.DataFrame(results_log_for_gradio or [{"Info": "No questions processed"}])
     submission_payload_for_api = {
         "username": username.strip(),
         "agent_code": agent_code,
+        "answers": answers_for_api_submission
     }
+    gaia_logger.info(f"Submitting {len(answers_for_api_submission)} answers for '{username}' to API...")
+    gaia_logger.debug(f"API Submission Payload Sample: {json.dumps(submission_payload_for_api)[:500]}")
     try:
         response = requests.post(submit_url, json=submission_payload_for_api, timeout=60);
         status = (f"Submission Successful!\nUser: {result_data.get('username')}\nScore: {result_data.get('score','N/A')}% "
                   f"({result_data.get('correct_count','?')}/{result_data.get('total_attempted','?')} correct)\n"
                   f"Msg: {result_data.get('message','No message.')}")
+        return status, pd.DataFrame(results_log_for_gradio)
     except requests.exceptions.HTTPError as e:
         err_detail = f"Server: {e.response.status_code}. Detail: {e.response.text[:200]}"
+        return f"Submission Failed: {err_detail}", pd.DataFrame(results_log_for_gradio)
+    except Exception as e: return f"Submission Failed: {e}", pd.DataFrame(results_log_for_gradio)
 with gr.Blocks(title="GAIA RAG Agent - Advanced") as demo:
     gr.Markdown("# Gaia Level 1 Agent (RAG & FileProcessor) Evaluation Runner")
     for k, v in required_env.items(): print(f"✅ {k} found." if v else f"⚠️ WARNING: {k} not set.")
     for lib_name, lib_var in [("transformers", hf_transformers_pipeline), ("torch", torch), ("librosa", librosa), ("openpyxl", openpyxl), ("pdfplumber", pdfplumber)]:
         print(f"✅ {lib_name} lib found." if lib_var else f"⚠️ WARNING: {lib_name} lib missing (some file types may not be processed).")
     if missing_keys: print(f"\n--- PLEASE SET MISSING ENV VARS: {', '.join(missing_keys)} ---\n")
     print("-"*(60 + len(" RAG & FileProcessor Agent App Starting ")) + "\n")
     demo.launch(server_name="0.0.0.0", server_port=7860, debug=False, share=False)