Spaces:
Sleeping
Sleeping
| """ | |
| Test script to debug metadata loading and file finding. | |
| """ | |
| import os | |
| import json | |
| import sys | |
| # Add the parent directory to sys.path | |
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
| from agent.utils.question_analyzer import QuestionAnalyzer | |
| def main(): | |
| """Main function to test metadata loading and file finding.""" | |
| # Get the resource directory | |
| resource_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'resource') | |
| print(f"Resource directory: {resource_dir}") | |
| # Check if the directory exists | |
| if not os.path.exists(resource_dir): | |
| print(f"Resource directory does not exist: {resource_dir}") | |
| return | |
| # List files in the directory | |
| print("Files in resource directory:") | |
| for file in os.listdir(resource_dir): | |
| print(f" {file}") | |
| # Check for metadata.jsonl | |
| metadata_path = os.path.join(resource_dir, 'metadata.jsonl') | |
| if not os.path.exists(metadata_path): | |
| print(f"Metadata file does not exist: {metadata_path}") | |
| return | |
| # Load metadata | |
| print("\nLoading metadata...") | |
| question_analyzer = QuestionAnalyzer(resource_dir) | |
| # Print metadata entries | |
| print(f"Metadata entries: {len(question_analyzer.metadata)}") | |
| # Print first few entries | |
| count = 0 | |
| for task_id, entry in question_analyzer.metadata.items(): | |
| print(f"\nTask ID: {task_id}") | |
| print(f"Question: {entry.get('Question', 'N/A')[:100]}...") | |
| print(f"File Name: {entry.get('file_name', 'N/A')}") | |
| print(f"Expected Answer: {entry.get('Final answer', 'N/A')}") | |
| # Check if the file exists | |
| if entry.get('file_name'): | |
| file_path = os.path.join(resource_dir, entry['file_name']) | |
| if os.path.exists(file_path): | |
| print(f"β File exists: {file_path}") | |
| else: | |
| print(f"β File does not exist: {file_path}") | |
| count += 1 | |
| if count >= 5: | |
| break | |
| # Test file finding | |
| print("\nTesting file finding...") | |
| test_questions = [] | |
| with open(metadata_path, 'r', encoding='utf-8') as f: | |
| for line in f: | |
| entry = json.loads(line.strip()) | |
| if 'Question' in entry and 'file_name' in entry and entry['file_name']: | |
| test_questions.append({ | |
| 'task_id': entry.get('task_id'), | |
| 'question': entry['Question'], | |
| 'file_name': entry['file_name'] | |
| }) | |
| if len(test_questions) >= 5: | |
| break | |
| for q in test_questions: | |
| print(f"\nQuestion: {q['question'][:100]}...") | |
| print(f"Expected file: {q['file_name']}") | |
| file_path = question_analyzer.find_relevant_file(q['question'], q['task_id']) | |
| if file_path: | |
| print(f"β Found file: {os.path.basename(file_path)}") | |
| else: | |
| print("β No file found") | |
| if __name__ == "__main__": | |
| main() | |