Spaces:
Sleeping
Sleeping
| # Utility functions for GAIA Agent Evaluator | |
| from langchain.docstore.document import Document | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from config import GAIA_KNOWLEDGE, ANSWER_PREFIXES_TO_REMOVE, LLM_RESPONSE_MARKERS, LLM_END_MARKERS | |
| def create_knowledge_documents(): | |
| """Create knowledge base documents from GAIA_KNOWLEDGE.""" | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=500, | |
| chunk_overlap=50, | |
| separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""] | |
| ) | |
| knowledge_chunks = text_splitter.split_text(GAIA_KNOWLEDGE) | |
| return [Document(page_content=chunk) for chunk in knowledge_chunks] | |
| def clean_llm_response(response, prompt): | |
| """Clean up the LLM response to extract the answer.""" | |
| # Remove the prompt from the beginning if it's included | |
| if response.startswith(prompt): | |
| response = response[len(prompt):] | |
| # Try to find where the model's actual answer begins | |
| for marker in LLM_RESPONSE_MARKERS: | |
| if marker.lower() in response.lower(): | |
| parts = response.lower().split(marker.lower(), 1) | |
| if len(parts) > 1: | |
| response = parts[1].strip() | |
| # Remove any closing tags if they exist | |
| for marker in LLM_END_MARKERS: | |
| if marker.lower() in response.lower(): | |
| response = response.lower().split(marker.lower())[0].strip() | |
| return response.strip() | |
| def extract_final_answer(answer): | |
| """Extract and clean the final answer for exact matching.""" | |
| clean_answer = answer.strip() | |
| # Remove prefixes (case insensitive) | |
| for prefix in ANSWER_PREFIXES_TO_REMOVE: | |
| if clean_answer.lower().startswith(prefix.lower()): | |
| clean_answer = clean_answer[len(prefix):].strip() | |
| # Remove quotes if the entire answer is quoted | |
| if clean_answer.startswith('"') and clean_answer.endswith('"'): | |
| clean_answer = clean_answer[1:-1] | |
| elif clean_answer.startswith("'") and clean_answer.endswith("'"): | |
| clean_answer = clean_answer[1:-1] | |
| # Remove trailing periods if they seem extraneous | |
| if clean_answer.endswith('.') and not clean_answer.replace('.', '').isdigit(): | |
| # Don't remove decimal points from numbers | |
| if not (clean_answer.count('.') == 1 and clean_answer.replace('.', '').isdigit()): | |
| clean_answer = clean_answer[:-1] | |
| # Clean up extra whitespace | |
| clean_answer = ' '.join(clean_answer.split()) | |
| return clean_answer | |
| def format_prompt(question, context=""): | |
| """Format the question into a proper prompt for the LLM.""" | |
| if context: | |
| return f"""You are a precise AI assistant that answers questions using available information. Your answer will be evaluated with exact string matching, so provide only the specific answer requested without additional text. | |
| Context Information: | |
| {context} | |
| Question: {question} | |
| Critical Instructions: | |
| - Provide ONLY the exact answer requested, nothing else | |
| - Do not include phrases like "The answer is", "Final answer", or "Based on the context" | |
| - For numerical answers, use the exact format requested (integers, decimals, etc.) | |
| - For lists, use the exact formatting specified in the question (commas, spaces, etc.) | |
| - For names, use proper capitalization as would appear in official sources | |
| - Be concise and precise - extra words will cause evaluation failure | |
| - If the question asks for multiple items, provide them in the exact format requested | |
| Direct Answer:""" | |
| else: | |
| return f"""You are a precise AI assistant that answers questions accurately. Your answer will be evaluated with exact string matching, so provide only the specific answer requested without additional text. | |
| Question: {question} | |
| Critical Instructions: | |
| - Provide ONLY the exact answer requested, nothing else | |
| - Do not include phrases like "The answer is", "Final answer", or explanations | |
| - For numerical answers, use the exact format that would be expected | |
| - For lists, use appropriate formatting (commas, spaces, etc.) | |
| - For names, use proper capitalization | |
| - Be concise and precise - extra words will cause evaluation failure | |
| - Answer based on your knowledge and reasoning | |
| Direct Answer:""" | |