Commit
·
86c6b45
1
Parent(s):
64b2383
........
Browse files
app.py
CHANGED
|
@@ -723,7 +723,6 @@ class GaiaLevel1Agent:
|
|
| 723 |
content_type = response.headers.get("Content-Type", "")
|
| 724 |
processed_content = FileProcessor.process(response.content, filename, content_type)
|
| 725 |
return processed_content
|
| 726 |
-
|
| 727 |
except requests.exceptions.HTTPError as e:
|
| 728 |
if e.response.status_code == 404:
|
| 729 |
gaia_logger.warning(f"File not found for task {task_id}: {file_url}")
|
|
@@ -740,30 +739,29 @@ class GaiaLevel1Agent:
|
|
| 740 |
def _parse_llm_output(self, llm_text: str) -> Dict[str, str]:
|
| 741 |
reasoning_trace = ""
|
| 742 |
model_answer = ""
|
| 743 |
-
|
| 744 |
final_answer_sentinel = "FINAL ANSWER:"
|
|
|
|
| 745 |
parts = llm_text.split(final_answer_sentinel, 1)
|
| 746 |
|
| 747 |
if len(parts) == 2:
|
| 748 |
reasoning_trace = parts[0].strip()
|
| 749 |
model_answer = parts[1].strip()
|
| 750 |
else:
|
| 751 |
-
reasoning_trace = llm_text
|
| 752 |
lines = llm_text.strip().split('\n')
|
| 753 |
-
model_answer = lines[-1].strip() if lines else "Could not parse answer"
|
| 754 |
-
gaia_logger.warning(f"LLM output did not contain '{final_answer_sentinel}'. Using fallback parsing.")
|
| 755 |
|
| 756 |
return {"model_answer": model_answer, "reasoning_trace": reasoning_trace}
|
| 757 |
|
| 758 |
-
|
| 759 |
def _formulate_answer_with_llm(self, question: str, file_context: Optional[str], web_context: Optional[str]) -> Dict[str, str]:
|
| 760 |
-
|
| 761 |
default_reasoning = "LLM processing failed or context insufficient."
|
| 762 |
|
| 763 |
if not self.llm_model:
|
| 764 |
gaia_logger.warning("LLM model (Gemini) not available for answer formulation.")
|
| 765 |
reasoning = "LLM model (Gemini) not available for answer formulation."
|
| 766 |
-
|
| 767 |
if web_context and file_context:
|
| 768 |
reasoning += " Context from file and web was found but not processed by LLM."
|
| 769 |
elif web_context:
|
|
@@ -772,7 +770,7 @@ class GaiaLevel1Agent:
|
|
| 772 |
reasoning += f" File context found: {file_context[:100]}..."
|
| 773 |
else:
|
| 774 |
reasoning += " No context found."
|
| 775 |
-
return {"model_answer":
|
| 776 |
|
| 777 |
prompt_parts = [
|
| 778 |
"You are a general AI assistant. Your primary goal is to answer the user's question accurately and concisely based *only* on the provided context (from a document and/or web search results).",
|
|
@@ -788,13 +786,11 @@ class GaiaLevel1Agent:
|
|
| 788 |
]
|
| 789 |
|
| 790 |
current_prompt_text_len = sum(len(p) for p in prompt_parts)
|
| 791 |
-
|
| 792 |
context_added = False
|
| 793 |
if file_context:
|
| 794 |
file_header = "\n\nContext from Provided Document:\n---"
|
| 795 |
file_footer = "\n---"
|
| 796 |
max_len_for_file = MAX_CONTEXT_LENGTH_LLM - current_prompt_text_len - (len(web_context) if web_context else 0) - len(file_header) - len(file_footer) - 500
|
| 797 |
-
|
| 798 |
if max_len_for_file > 100 :
|
| 799 |
truncated_file_context = file_context[:max_len_for_file]
|
| 800 |
if len(file_context) > len(truncated_file_context):
|
|
@@ -802,35 +798,26 @@ class GaiaLevel1Agent:
|
|
| 802 |
prompt_parts.extend([file_header, truncated_file_context, file_footer])
|
| 803 |
current_prompt_text_len += len(file_header) + len(truncated_file_context) + len(file_footer)
|
| 804 |
context_added = True
|
| 805 |
-
else:
|
| 806 |
-
gaia_logger.warning("Not enough space for file context in LLM prompt.")
|
| 807 |
-
|
| 808 |
|
| 809 |
if web_context:
|
| 810 |
web_header = "\n\nContext from Web Search Results:\n---"
|
| 811 |
web_footer = "\n---"
|
| 812 |
available_len_for_web = MAX_CONTEXT_LENGTH_LLM - current_prompt_text_len - len(web_header) - len(web_footer) - 300
|
| 813 |
-
|
| 814 |
if available_len_for_web > 100:
|
| 815 |
truncated_web_context = web_context
|
| 816 |
if len(web_context) > available_len_for_web:
|
| 817 |
truncated_web_context = web_context[:available_len_for_web] + "\n... (web context truncated)"
|
| 818 |
gaia_logger.info(f"Truncated web context from {len(web_context)} to {len(truncated_web_context)} chars for LLM.")
|
| 819 |
-
|
| 820 |
prompt_parts.extend([web_header, truncated_web_context, web_footer])
|
| 821 |
context_added = True
|
| 822 |
-
else:
|
| 823 |
-
gaia_logger.warning("Not enough space for web context in LLM prompt, or web context itself is empty.")
|
| 824 |
-
|
| 825 |
|
| 826 |
-
if not context_added:
|
| 827 |
-
prompt_parts.append("\n\nNo document or web context could be provided due to length constraints or availability.")
|
| 828 |
-
|
| 829 |
prompt_parts.append("\n\nReasoning and Final Answer:")
|
| 830 |
final_prompt = "\n".join(prompt_parts)
|
| 831 |
|
| 832 |
gaia_logger.info(f"LLM Prompt (first 300): {final_prompt[:300]}...")
|
| 833 |
-
gaia_logger.info(f"LLM Prompt (last 300): ...{final_prompt[-300:]}")
|
| 834 |
gaia_logger.info(f"LLM Total prompt length: {len(final_prompt)} chars.")
|
| 835 |
|
| 836 |
if not GenerationConfig:
|
|
@@ -838,40 +825,29 @@ class GaiaLevel1Agent:
|
|
| 838 |
return {"model_answer": "LLM configuration error", "reasoning_trace": "GenerationConfig not available."}
|
| 839 |
|
| 840 |
try:
|
| 841 |
-
gen_config = GenerationConfig(
|
| 842 |
-
temperature=0.1,
|
| 843 |
-
top_p=0.95,
|
| 844 |
-
max_output_tokens=2048
|
| 845 |
-
)
|
| 846 |
safety_set = [{"category": c, "threshold": "BLOCK_MEDIUM_AND_ABOVE"} for c in ["HARM_CATEGORY_HARASSMENT", "HARM_CATEGORY_HATE_SPEECH", "HARM_CATEGORY_SEXUALLY_EXPLICIT", "HARM_CATEGORY_DANGEROUS_CONTENT"]]
|
| 847 |
-
|
| 848 |
-
response = self.llm_model.generate_content(
|
| 849 |
-
final_prompt,
|
| 850 |
-
generation_config=gen_config,
|
| 851 |
-
safety_settings=safety_set
|
| 852 |
-
)
|
| 853 |
|
| 854 |
if not response.candidates or (hasattr(response, 'prompt_feedback') and response.prompt_feedback.block_reason):
|
| 855 |
reason_text = "Unknown"
|
| 856 |
-
if hasattr(response, 'prompt_feedback') and response.prompt_feedback.block_reason:
|
| 857 |
-
reason_text = response.prompt_feedback.block_reason.name
|
| 858 |
gaia_logger.warning(f"Gemini response blocked. Reason: {reason_text}.")
|
| 859 |
return {"model_answer": "Error processing request", "reasoning_trace": f"My response was blocked (Reason: {reason_text})."}
|
| 860 |
|
| 861 |
llm_answer_text = response.text
|
| 862 |
gaia_logger.info(f"LLM Raw Full Answer (first 200): {llm_answer_text[:200]}...")
|
| 863 |
-
|
| 864 |
return self._parse_llm_output(llm_answer_text)
|
| 865 |
|
| 866 |
except Exception as e:
|
| 867 |
gaia_logger.error(f"Error calling Gemini API: {e}", exc_info=True)
|
| 868 |
error_type_name = type(e).__name__
|
| 869 |
reasoning = f"Error calling Gemini API: {error_type_name} - {str(e)}"
|
| 870 |
-
|
| 871 |
if "429" in str(e) or "ResourceExhausted" in error_type_name:
|
| 872 |
-
|
| 873 |
reasoning = "Error: LLM temporarily unavailable (rate limit)."
|
| 874 |
-
return {"model_answer":
|
| 875 |
|
| 876 |
def __call__(self, question: str, task_id: Optional[str] = None) -> Dict[str, str]:
|
| 877 |
gaia_logger.info(f"Agent processing: '{question[:70]}...', TaskID: {task_id}")
|
|
@@ -880,58 +856,42 @@ class GaiaLevel1Agent:
|
|
| 880 |
if "what is your name" in q_lower or "who are you" in q_lower:
|
| 881 |
return {"model_answer": "general AI assistant", "reasoning_trace": "User asked for my identity."}
|
| 882 |
|
| 883 |
-
|
| 884 |
file_ctx_str: Optional[str] = None
|
| 885 |
file_kws = ["document", "file", "text", "provide", "attach", "read", "content", "table", "data", "excel", "pdf", "audio", "code", "script", "log"]
|
| 886 |
if task_id and (any(kw in q_lower for kw in file_kws) or "this task involves a file" in q_lower):
|
| 887 |
file_ctx_str = self._fetch_and_process_file_content(task_id)
|
| 888 |
-
if file_ctx_str:
|
| 889 |
-
|
| 890 |
-
else:
|
| 891 |
-
gaia_logger.warning(f"No file content or failed to process for task {task_id}")
|
| 892 |
|
| 893 |
web_ctx_str: Optional[str] = None
|
| 894 |
needs_web = True
|
| 895 |
if file_ctx_str and len(file_ctx_str) > 300:
|
| 896 |
-
web_still_needed_kws = [
|
| 897 |
-
"what is", "who is", "current", "latest", "news", "public opinion",
|
| 898 |
-
"recent events", "search for", "find information on", "browse", "look up"
|
| 899 |
-
]
|
| 900 |
doc_can_answer_kws = ["summarize", "according to the document", "in the provided text"]
|
| 901 |
-
|
| 902 |
if any(kw in q_lower for kw in doc_can_answer_kws) and not any(kw in q_lower for kw in web_still_needed_kws):
|
| 903 |
needs_web = False
|
| 904 |
-
gaia_logger.info("Question seems focused on document context, and substantial file context exists. Tentatively skipping web search.")
|
| 905 |
elif not any(kw in q_lower for kw in web_still_needed_kws):
|
| 906 |
needs_web = False
|
| 907 |
-
gaia_logger.info("Substantial file context present and question doesn't strongly imply web search. Skipping web search.")
|
| 908 |
-
|
| 909 |
if "don't search" in q_lower or "do not search" in q_lower or "without searching" in q_lower:
|
| 910 |
needs_web = False
|
| 911 |
-
gaia_logger.info("Web search explicitly disabled by prompt.")
|
| 912 |
|
| 913 |
if needs_web:
|
| 914 |
search_q = question.replace("?", "").strip()
|
| 915 |
-
gaia_logger.info(f"RAG Pipeline initiated for query: {search_q[:70]}")
|
| 916 |
rag_res = self.rag_pipeline.analyze(query=search_q, force_refresh=False)
|
| 917 |
if rag_res:
|
| 918 |
snippets = []
|
| 919 |
for i, res_item in enumerate(rag_res):
|
| 920 |
-
title = res_item.get('title','N/A')
|
| 921 |
-
|
| 922 |
-
href = res_item.get('href','#')
|
| 923 |
-
provider = res_item.get('query_tag','WebSearch')
|
| 924 |
-
prefix = "EnrichedContent" if res_item.get('enriched') else "Snippet"
|
| 925 |
body_preview = (body[:1500] + "...") if len(body) > 1500 else body
|
| 926 |
snippets.append(f"Source [{i+1} - {provider}]: {title}\nURL: {href}\n{prefix}: {body_preview}\n---")
|
| 927 |
web_ctx_str = "\n\n".join(snippets)
|
| 928 |
-
|
| 929 |
-
else:
|
| 930 |
-
gaia_logger.warning("RAG pipeline yielded no web results for the query.")
|
| 931 |
|
| 932 |
agent_response_dict = self._formulate_answer_with_llm(question, file_ctx_str, web_ctx_str)
|
| 933 |
gaia_logger.info(f"LLM-based model_answer (first 70): {agent_response_dict.get('model_answer', '')[:70]}...")
|
| 934 |
return agent_response_dict
|
|
|
|
| 935 |
|
| 936 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
| 937 |
space_id = os.getenv("SPACE_ID")
|
|
@@ -947,57 +907,46 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 947 |
if not questions_data or not isinstance(questions_data, list): return "Questions list empty/invalid.", None
|
| 948 |
except Exception as e: return f"Error fetching questions: {e}", None
|
| 949 |
|
| 950 |
-
|
| 951 |
GEMINI_RPM_LIMIT = int(os.getenv("GEMINI_RPM_LIMIT", "60"))
|
| 952 |
sleep_llm = (60.0 / GEMINI_RPM_LIMIT) + 0.5 if GEMINI_RPM_LIMIT > 0 else 0.2
|
| 953 |
|
| 954 |
for i, item in enumerate(questions_data):
|
| 955 |
task_id, q_text = item.get("task_id"), item.get("question")
|
|
|
|
|
|
|
|
|
|
| 956 |
if not task_id or q_text is None:
|
| 957 |
-
|
|
|
|
|
|
|
|
|
|
| 958 |
continue
|
|
|
|
| 959 |
gaia_logger.info(f"Q {i+1}/{len(questions_data)} - Task: {task_id}")
|
| 960 |
-
model_answer_val = "AGENT ERROR"
|
| 961 |
-
reasoning_trace_val = "Agent error occurred."
|
| 962 |
try:
|
| 963 |
agent_response_dict = agent(question=q_text, task_id=task_id)
|
| 964 |
-
model_answer_val = agent_response_dict.get("model_answer", "Error: No model_answer key")
|
| 965 |
-
reasoning_trace_val = agent_response_dict.get("reasoning_trace", "")
|
| 966 |
-
|
| 967 |
-
answers_payload_for_submission.append({
|
| 968 |
-
"task_id": task_id,
|
| 969 |
-
"model_answer": model_answer_val,
|
| 970 |
-
"reasoning_trace": reasoning_trace_val
|
| 971 |
-
})
|
| 972 |
-
results_log.append({"Task ID": task_id, "Question": q_text, "Model Answer": model_answer_val, "Reasoning Trace": reasoning_trace_val[:500] + "..." if len(reasoning_trace_val)>500 else reasoning_trace_val})
|
| 973 |
except Exception as e:
|
| 974 |
-
|
| 975 |
-
|
| 976 |
-
|
| 977 |
-
|
| 978 |
-
|
| 979 |
-
|
| 980 |
-
|
| 981 |
if i < len(questions_data) - 1: time.sleep(sleep_llm)
|
| 982 |
|
| 983 |
-
if not
|
| 984 |
-
|
| 985 |
-
submission_content_lines = []
|
| 986 |
-
for ans_item in answers_payload_for_submission:
|
| 987 |
-
submission_entry = {"task_id": ans_item["task_id"], "model_answer": ans_item["model_answer"]}
|
| 988 |
-
if ans_item.get("reasoning_trace"): # Add reasoning_trace only if it exists and is not empty
|
| 989 |
-
submission_entry["reasoning_trace"] = ans_item["reasoning_trace"]
|
| 990 |
-
submission_content_lines.append(json.dumps(submission_entry))
|
| 991 |
-
|
| 992 |
-
submission_json_lines = "\n".join(submission_content_lines)
|
| 993 |
|
| 994 |
submission_payload_for_api = {
|
| 995 |
"username": username.strip(),
|
| 996 |
"agent_code": agent_code,
|
| 997 |
-
"
|
| 998 |
}
|
| 999 |
-
gaia_logger.info(f"Submitting {len(
|
| 1000 |
-
gaia_logger.debug(f"Submission
|
| 1001 |
|
| 1002 |
try:
|
| 1003 |
response = requests.post(submit_url, json=submission_payload_for_api, timeout=60);
|
|
@@ -1006,11 +955,11 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 1006 |
status = (f"Submission Successful!\nUser: {result_data.get('username')}\nScore: {result_data.get('score','N/A')}% "
|
| 1007 |
f"({result_data.get('correct_count','?')}/{result_data.get('total_attempted','?')} correct)\n"
|
| 1008 |
f"Msg: {result_data.get('message','No message.')}")
|
| 1009 |
-
return status, pd.DataFrame(
|
| 1010 |
except requests.exceptions.HTTPError as e:
|
| 1011 |
err_detail = f"Server: {e.response.status_code}. Detail: {e.response.text[:200]}"
|
| 1012 |
-
return f"Submission Failed: {err_detail}", pd.DataFrame(
|
| 1013 |
-
except Exception as e: return f"Submission Failed: {e}", pd.DataFrame(
|
| 1014 |
|
| 1015 |
with gr.Blocks(title="GAIA RAG Agent - Advanced") as demo:
|
| 1016 |
gr.Markdown("# Gaia Level 1 Agent (RAG & FileProcessor) Evaluation Runner")
|
|
@@ -1037,7 +986,6 @@ if __name__ == "__main__":
|
|
| 1037 |
for k, v in required_env.items(): print(f"✅ {k} found." if v else f"⚠️ WARNING: {k} not set.")
|
| 1038 |
for lib_name, lib_var in [("transformers", hf_transformers_pipeline), ("torch", torch), ("librosa", librosa), ("openpyxl", openpyxl), ("pdfplumber", pdfplumber)]:
|
| 1039 |
print(f"✅ {lib_name} lib found." if lib_var else f"⚠️ WARNING: {lib_name} lib missing (some file types may not be processed).")
|
| 1040 |
-
print("👉 REMEMBER TO INSTALL 'tabulate' if you haven't: pip install tabulate")
|
| 1041 |
if missing_keys: print(f"\n--- PLEASE SET MISSING ENV VARS: {', '.join(missing_keys)} ---\n")
|
| 1042 |
print("-"*(60 + len(" RAG & FileProcessor Agent App Starting ")) + "\n")
|
| 1043 |
demo.launch(server_name="0.0.0.0", server_port=7860, debug=False, share=False)
|
|
|
|
| 723 |
content_type = response.headers.get("Content-Type", "")
|
| 724 |
processed_content = FileProcessor.process(response.content, filename, content_type)
|
| 725 |
return processed_content
|
|
|
|
| 726 |
except requests.exceptions.HTTPError as e:
|
| 727 |
if e.response.status_code == 404:
|
| 728 |
gaia_logger.warning(f"File not found for task {task_id}: {file_url}")
|
|
|
|
| 739 |
def _parse_llm_output(self, llm_text: str) -> Dict[str, str]:
|
| 740 |
reasoning_trace = ""
|
| 741 |
model_answer = ""
|
|
|
|
| 742 |
final_answer_sentinel = "FINAL ANSWER:"
|
| 743 |
+
|
| 744 |
parts = llm_text.split(final_answer_sentinel, 1)
|
| 745 |
|
| 746 |
if len(parts) == 2:
|
| 747 |
reasoning_trace = parts[0].strip()
|
| 748 |
model_answer = parts[1].strip()
|
| 749 |
else:
|
| 750 |
+
reasoning_trace = llm_text
|
| 751 |
lines = llm_text.strip().split('\n')
|
| 752 |
+
model_answer = lines[-1].strip() if lines else "Could not parse answer"
|
| 753 |
+
gaia_logger.warning(f"LLM output did not contain '{final_answer_sentinel}'. Using fallback parsing. Full LLM text: '{llm_text[:200]}...'")
|
| 754 |
|
| 755 |
return {"model_answer": model_answer, "reasoning_trace": reasoning_trace}
|
| 756 |
|
|
|
|
| 757 |
def _formulate_answer_with_llm(self, question: str, file_context: Optional[str], web_context: Optional[str]) -> Dict[str, str]:
|
| 758 |
+
default_model_answer = "Information not available in provided context"
|
| 759 |
default_reasoning = "LLM processing failed or context insufficient."
|
| 760 |
|
| 761 |
if not self.llm_model:
|
| 762 |
gaia_logger.warning("LLM model (Gemini) not available for answer formulation.")
|
| 763 |
reasoning = "LLM model (Gemini) not available for answer formulation."
|
| 764 |
+
answer_val = default_model_answer
|
| 765 |
if web_context and file_context:
|
| 766 |
reasoning += " Context from file and web was found but not processed by LLM."
|
| 767 |
elif web_context:
|
|
|
|
| 770 |
reasoning += f" File context found: {file_context[:100]}..."
|
| 771 |
else:
|
| 772 |
reasoning += " No context found."
|
| 773 |
+
return {"model_answer": answer_val, "reasoning_trace": reasoning}
|
| 774 |
|
| 775 |
prompt_parts = [
|
| 776 |
"You are a general AI assistant. Your primary goal is to answer the user's question accurately and concisely based *only* on the provided context (from a document and/or web search results).",
|
|
|
|
| 786 |
]
|
| 787 |
|
| 788 |
current_prompt_text_len = sum(len(p) for p in prompt_parts)
|
|
|
|
| 789 |
context_added = False
|
| 790 |
if file_context:
|
| 791 |
file_header = "\n\nContext from Provided Document:\n---"
|
| 792 |
file_footer = "\n---"
|
| 793 |
max_len_for_file = MAX_CONTEXT_LENGTH_LLM - current_prompt_text_len - (len(web_context) if web_context else 0) - len(file_header) - len(file_footer) - 500
|
|
|
|
| 794 |
if max_len_for_file > 100 :
|
| 795 |
truncated_file_context = file_context[:max_len_for_file]
|
| 796 |
if len(file_context) > len(truncated_file_context):
|
|
|
|
| 798 |
prompt_parts.extend([file_header, truncated_file_context, file_footer])
|
| 799 |
current_prompt_text_len += len(file_header) + len(truncated_file_context) + len(file_footer)
|
| 800 |
context_added = True
|
| 801 |
+
else: gaia_logger.warning("Not enough space for file context in LLM prompt.")
|
|
|
|
|
|
|
| 802 |
|
| 803 |
if web_context:
|
| 804 |
web_header = "\n\nContext from Web Search Results:\n---"
|
| 805 |
web_footer = "\n---"
|
| 806 |
available_len_for_web = MAX_CONTEXT_LENGTH_LLM - current_prompt_text_len - len(web_header) - len(web_footer) - 300
|
|
|
|
| 807 |
if available_len_for_web > 100:
|
| 808 |
truncated_web_context = web_context
|
| 809 |
if len(web_context) > available_len_for_web:
|
| 810 |
truncated_web_context = web_context[:available_len_for_web] + "\n... (web context truncated)"
|
| 811 |
gaia_logger.info(f"Truncated web context from {len(web_context)} to {len(truncated_web_context)} chars for LLM.")
|
|
|
|
| 812 |
prompt_parts.extend([web_header, truncated_web_context, web_footer])
|
| 813 |
context_added = True
|
| 814 |
+
else: gaia_logger.warning("Not enough space for web context in LLM prompt, or web context itself is empty.")
|
|
|
|
|
|
|
| 815 |
|
| 816 |
+
if not context_added: prompt_parts.append("\n\nNo document or web context could be provided due to length constraints or availability.")
|
|
|
|
|
|
|
| 817 |
prompt_parts.append("\n\nReasoning and Final Answer:")
|
| 818 |
final_prompt = "\n".join(prompt_parts)
|
| 819 |
|
| 820 |
gaia_logger.info(f"LLM Prompt (first 300): {final_prompt[:300]}...")
|
|
|
|
| 821 |
gaia_logger.info(f"LLM Total prompt length: {len(final_prompt)} chars.")
|
| 822 |
|
| 823 |
if not GenerationConfig:
|
|
|
|
| 825 |
return {"model_answer": "LLM configuration error", "reasoning_trace": "GenerationConfig not available."}
|
| 826 |
|
| 827 |
try:
|
| 828 |
+
gen_config = GenerationConfig(temperature=0.1, top_p=0.95, max_output_tokens=2048)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 829 |
safety_set = [{"category": c, "threshold": "BLOCK_MEDIUM_AND_ABOVE"} for c in ["HARM_CATEGORY_HARASSMENT", "HARM_CATEGORY_HATE_SPEECH", "HARM_CATEGORY_SEXUALLY_EXPLICIT", "HARM_CATEGORY_DANGEROUS_CONTENT"]]
|
| 830 |
+
response = self.llm_model.generate_content(final_prompt, generation_config=gen_config, safety_settings=safety_set)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 831 |
|
| 832 |
if not response.candidates or (hasattr(response, 'prompt_feedback') and response.prompt_feedback.block_reason):
|
| 833 |
reason_text = "Unknown"
|
| 834 |
+
if hasattr(response, 'prompt_feedback') and response.prompt_feedback.block_reason: reason_text = response.prompt_feedback.block_reason.name
|
|
|
|
| 835 |
gaia_logger.warning(f"Gemini response blocked. Reason: {reason_text}.")
|
| 836 |
return {"model_answer": "Error processing request", "reasoning_trace": f"My response was blocked (Reason: {reason_text})."}
|
| 837 |
|
| 838 |
llm_answer_text = response.text
|
| 839 |
gaia_logger.info(f"LLM Raw Full Answer (first 200): {llm_answer_text[:200]}...")
|
|
|
|
| 840 |
return self._parse_llm_output(llm_answer_text)
|
| 841 |
|
| 842 |
except Exception as e:
|
| 843 |
gaia_logger.error(f"Error calling Gemini API: {e}", exc_info=True)
|
| 844 |
error_type_name = type(e).__name__
|
| 845 |
reasoning = f"Error calling Gemini API: {error_type_name} - {str(e)}"
|
| 846 |
+
answer_val = "LLM API error"
|
| 847 |
if "429" in str(e) or "ResourceExhausted" in error_type_name:
|
| 848 |
+
answer_val = "LLM rate limit"
|
| 849 |
reasoning = "Error: LLM temporarily unavailable (rate limit)."
|
| 850 |
+
return {"model_answer": answer_val, "reasoning_trace": reasoning}
|
| 851 |
|
| 852 |
def __call__(self, question: str, task_id: Optional[str] = None) -> Dict[str, str]:
|
| 853 |
gaia_logger.info(f"Agent processing: '{question[:70]}...', TaskID: {task_id}")
|
|
|
|
| 856 |
if "what is your name" in q_lower or "who are you" in q_lower:
|
| 857 |
return {"model_answer": "general AI assistant", "reasoning_trace": "User asked for my identity."}
|
| 858 |
|
|
|
|
| 859 |
file_ctx_str: Optional[str] = None
|
| 860 |
file_kws = ["document", "file", "text", "provide", "attach", "read", "content", "table", "data", "excel", "pdf", "audio", "code", "script", "log"]
|
| 861 |
if task_id and (any(kw in q_lower for kw in file_kws) or "this task involves a file" in q_lower):
|
| 862 |
file_ctx_str = self._fetch_and_process_file_content(task_id)
|
| 863 |
+
if file_ctx_str: gaia_logger.info(f"Processed file context ({len(file_ctx_str)} chars) for task {task_id}")
|
| 864 |
+
else: gaia_logger.warning(f"No file content or failed to process for task {task_id}")
|
|
|
|
|
|
|
| 865 |
|
| 866 |
web_ctx_str: Optional[str] = None
|
| 867 |
needs_web = True
|
| 868 |
if file_ctx_str and len(file_ctx_str) > 300:
|
| 869 |
+
web_still_needed_kws = ["what is", "who is", "current", "latest", "news", "public opinion", "recent events", "search for", "find information on", "browse", "look up"]
|
|
|
|
|
|
|
|
|
|
| 870 |
doc_can_answer_kws = ["summarize", "according to the document", "in the provided text"]
|
|
|
|
| 871 |
if any(kw in q_lower for kw in doc_can_answer_kws) and not any(kw in q_lower for kw in web_still_needed_kws):
|
| 872 |
needs_web = False
|
|
|
|
| 873 |
elif not any(kw in q_lower for kw in web_still_needed_kws):
|
| 874 |
needs_web = False
|
|
|
|
|
|
|
| 875 |
if "don't search" in q_lower or "do not search" in q_lower or "without searching" in q_lower:
|
| 876 |
needs_web = False
|
|
|
|
| 877 |
|
| 878 |
if needs_web:
|
| 879 |
search_q = question.replace("?", "").strip()
|
|
|
|
| 880 |
rag_res = self.rag_pipeline.analyze(query=search_q, force_refresh=False)
|
| 881 |
if rag_res:
|
| 882 |
snippets = []
|
| 883 |
for i, res_item in enumerate(rag_res):
|
| 884 |
+
title, body, href = res_item.get('title','N/A'), res_item.get('body',''), res_item.get('href','#')
|
| 885 |
+
provider, prefix = res_item.get('query_tag','WebSearch'), "EnrichedContent" if res_item.get('enriched') else "Snippet"
|
|
|
|
|
|
|
|
|
|
| 886 |
body_preview = (body[:1500] + "...") if len(body) > 1500 else body
|
| 887 |
snippets.append(f"Source [{i+1} - {provider}]: {title}\nURL: {href}\n{prefix}: {body_preview}\n---")
|
| 888 |
web_ctx_str = "\n\n".join(snippets)
|
| 889 |
+
else: gaia_logger.warning("RAG pipeline yielded no web results.")
|
|
|
|
|
|
|
| 890 |
|
| 891 |
agent_response_dict = self._formulate_answer_with_llm(question, file_ctx_str, web_ctx_str)
|
| 892 |
gaia_logger.info(f"LLM-based model_answer (first 70): {agent_response_dict.get('model_answer', '')[:70]}...")
|
| 893 |
return agent_response_dict
|
| 894 |
+
|
| 895 |
|
| 896 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
| 897 |
space_id = os.getenv("SPACE_ID")
|
|
|
|
| 907 |
if not questions_data or not isinstance(questions_data, list): return "Questions list empty/invalid.", None
|
| 908 |
except Exception as e: return f"Error fetching questions: {e}", None
|
| 909 |
|
| 910 |
+
results_log_for_gradio, answers_for_api_submission = [], []
|
| 911 |
GEMINI_RPM_LIMIT = int(os.getenv("GEMINI_RPM_LIMIT", "60"))
|
| 912 |
sleep_llm = (60.0 / GEMINI_RPM_LIMIT) + 0.5 if GEMINI_RPM_LIMIT > 0 else 0.2
|
| 913 |
|
| 914 |
for i, item in enumerate(questions_data):
|
| 915 |
task_id, q_text = item.get("task_id"), item.get("question")
|
| 916 |
+
model_answer_val = "AGENT ERROR"
|
| 917 |
+
reasoning_trace_val = "Agent error occurred prior to LLM call."
|
| 918 |
+
|
| 919 |
if not task_id or q_text is None:
|
| 920 |
+
model_answer_val = "SKIPPED"
|
| 921 |
+
reasoning_trace_val = "Task ID or question missing."
|
| 922 |
+
results_log_for_gradio.append({"Task ID": task_id, "Question": q_text, "Submitted Answer": model_answer_val, "Reasoning Trace": reasoning_trace_val})
|
| 923 |
+
answers_for_api_submission.append({"task_id": task_id, "submitted_answer": model_answer_val})
|
| 924 |
continue
|
| 925 |
+
|
| 926 |
gaia_logger.info(f"Q {i+1}/{len(questions_data)} - Task: {task_id}")
|
|
|
|
|
|
|
| 927 |
try:
|
| 928 |
agent_response_dict = agent(question=q_text, task_id=task_id)
|
| 929 |
+
model_answer_val = agent_response_dict.get("model_answer", "Error: No model_answer key in agent response")
|
| 930 |
+
reasoning_trace_val = agent_response_dict.get("reasoning_trace", "Error: No reasoning_trace key in agent response")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 931 |
except Exception as e:
|
| 932 |
+
gaia_logger.error(f"Error during agent call for task {task_id}: {e}", exc_info=True)
|
| 933 |
+
model_answer_val = "AGENT EXECUTION ERROR"
|
| 934 |
+
reasoning_trace_val = f"Agent call failed: {type(e).__name__} - {str(e)}"
|
| 935 |
+
|
| 936 |
+
answers_for_api_submission.append({"task_id": task_id, "submitted_answer": model_answer_val})
|
| 937 |
+
results_log_for_gradio.append({"Task ID": task_id, "Question": q_text, "Submitted Answer": model_answer_val, "Reasoning Trace (first 500 chars)": reasoning_trace_val[:500] + ("..." if len(reasoning_trace_val) > 500 else "")})
|
| 938 |
+
|
| 939 |
if i < len(questions_data) - 1: time.sleep(sleep_llm)
|
| 940 |
|
| 941 |
+
if not answers_for_api_submission: return "Agent produced no answers for API submission.", pd.DataFrame(results_log_for_gradio or [{"Info": "No questions processed"}])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 942 |
|
| 943 |
submission_payload_for_api = {
|
| 944 |
"username": username.strip(),
|
| 945 |
"agent_code": agent_code,
|
| 946 |
+
"answers": answers_for_api_submission
|
| 947 |
}
|
| 948 |
+
gaia_logger.info(f"Submitting {len(answers_for_api_submission)} answers for '{username}' to API...")
|
| 949 |
+
gaia_logger.debug(f"API Submission Payload Sample: {json.dumps(submission_payload_for_api)[:500]}")
|
| 950 |
|
| 951 |
try:
|
| 952 |
response = requests.post(submit_url, json=submission_payload_for_api, timeout=60);
|
|
|
|
| 955 |
status = (f"Submission Successful!\nUser: {result_data.get('username')}\nScore: {result_data.get('score','N/A')}% "
|
| 956 |
f"({result_data.get('correct_count','?')}/{result_data.get('total_attempted','?')} correct)\n"
|
| 957 |
f"Msg: {result_data.get('message','No message.')}")
|
| 958 |
+
return status, pd.DataFrame(results_log_for_gradio)
|
| 959 |
except requests.exceptions.HTTPError as e:
|
| 960 |
err_detail = f"Server: {e.response.status_code}. Detail: {e.response.text[:200]}"
|
| 961 |
+
return f"Submission Failed: {err_detail}", pd.DataFrame(results_log_for_gradio)
|
| 962 |
+
except Exception as e: return f"Submission Failed: {e}", pd.DataFrame(results_log_for_gradio)
|
| 963 |
|
| 964 |
with gr.Blocks(title="GAIA RAG Agent - Advanced") as demo:
|
| 965 |
gr.Markdown("# Gaia Level 1 Agent (RAG & FileProcessor) Evaluation Runner")
|
|
|
|
| 986 |
for k, v in required_env.items(): print(f"✅ {k} found." if v else f"⚠️ WARNING: {k} not set.")
|
| 987 |
for lib_name, lib_var in [("transformers", hf_transformers_pipeline), ("torch", torch), ("librosa", librosa), ("openpyxl", openpyxl), ("pdfplumber", pdfplumber)]:
|
| 988 |
print(f"✅ {lib_name} lib found." if lib_var else f"⚠️ WARNING: {lib_name} lib missing (some file types may not be processed).")
|
|
|
|
| 989 |
if missing_keys: print(f"\n--- PLEASE SET MISSING ENV VARS: {', '.join(missing_keys)} ---\n")
|
| 990 |
print("-"*(60 + len(" RAG & FileProcessor Agent App Starting ")) + "\n")
|
| 991 |
demo.launch(server_name="0.0.0.0", server_port=7860, debug=False, share=False)
|