import os import time def search_for_directory(target_dir, max_depth=5, start_paths=None): """ Search for a directory by name across the file system, with constraints to avoid system directories and excessive depth. Args: target_dir: The directory name to find max_depth: Maximum directory depth to search start_paths: List of paths to start search from. If None, use current directory """ if start_paths is None: start_paths = ['.', '..', '/home', '/app', '/mnt', '/tmp'] print(f"Searching for directory: {target_dir}") print(f"Starting search from: {start_paths}") found_paths = [] # Directories to skip for efficiency and to avoid permission errors skip_dirs = {'/proc', '/sys', '/dev', '/run', '/snap'} def search_dir(path, depth=0): if depth > max_depth: return try: # Skip specified directories if path in skip_dirs: return # List contents with full paths for item in os.listdir(path): full_path = os.path.join(path, item) # Check if this is our target if item == target_dir and os.path.isdir(full_path): print(f"FOUND: {full_path}") found_paths.append(full_path) # Print contents to verify it's what we're looking for try: contents = os.listdir(full_path) print(f"Contents: {contents}") except Exception as e: print(f"Could not list contents: {e}") # Recursively search subdirectories if os.path.isdir(full_path) and not os.path.islink(full_path): search_dir(full_path, depth + 1) except (PermissionError, FileNotFoundError) as e: # Skip directories we can't access pass except Exception as e: # Print other errors but continue searching print(f"Error accessing {path}: {e}") start_time = time.time() # Start the search from each starting path for start_path in start_paths: if os.path.exists(start_path) and os.path.isdir(start_path): search_dir(start_path) elapsed_time = time.time() - start_time print(f"Search completed in {elapsed_time:.2f} seconds") print(f"Found {len(found_paths)} matching directories:") for path in found_paths: print(f" - {path}") return found_paths if __name__ == "__main__": # Look for both processed_data and .streamlit to understand how directories are organized search_for_directory('processed_data') search_for_directory('.streamlit') # Also search for document_chunks.pkl file directly print("\nSearching for document_chunks.pkl file...") for root, dirs, files in os.walk('/home/user'): if 'document_chunks.pkl' in files: full_path = os.path.join(root, 'document_chunks.pkl') print(f"FOUND FILE: {full_path}") # Print out environment variables - they might contain useful information print("\nEnvironment variables:") for key, value in os.environ.items(): if 'PATH' in key or 'DIR' in key or 'HOME' in key: print(f"{key}: {value}")