Spaces:
Sleeping
Sleeping
| import os | |
| import time | |
| def search_for_directory(target_dir, max_depth=5, start_paths=None): | |
| """ | |
| Search for a directory by name across the file system, | |
| with constraints to avoid system directories and excessive depth. | |
| Args: | |
| target_dir: The directory name to find | |
| max_depth: Maximum directory depth to search | |
| start_paths: List of paths to start search from. If None, use current directory | |
| """ | |
| if start_paths is None: | |
| start_paths = ['.', '..', '/home', '/app', '/mnt', '/tmp'] | |
| print(f"Searching for directory: {target_dir}") | |
| print(f"Starting search from: {start_paths}") | |
| found_paths = [] | |
| # Directories to skip for efficiency and to avoid permission errors | |
| skip_dirs = {'/proc', '/sys', '/dev', '/run', '/snap'} | |
| def search_dir(path, depth=0): | |
| if depth > max_depth: | |
| return | |
| try: | |
| # Skip specified directories | |
| if path in skip_dirs: | |
| return | |
| # List contents with full paths | |
| for item in os.listdir(path): | |
| full_path = os.path.join(path, item) | |
| # Check if this is our target | |
| if item == target_dir and os.path.isdir(full_path): | |
| print(f"FOUND: {full_path}") | |
| found_paths.append(full_path) | |
| # Print contents to verify it's what we're looking for | |
| try: | |
| contents = os.listdir(full_path) | |
| print(f"Contents: {contents}") | |
| except Exception as e: | |
| print(f"Could not list contents: {e}") | |
| # Recursively search subdirectories | |
| if os.path.isdir(full_path) and not os.path.islink(full_path): | |
| search_dir(full_path, depth + 1) | |
| except (PermissionError, FileNotFoundError) as e: | |
| # Skip directories we can't access | |
| pass | |
| except Exception as e: | |
| # Print other errors but continue searching | |
| print(f"Error accessing {path}: {e}") | |
| start_time = time.time() | |
| # Start the search from each starting path | |
| for start_path in start_paths: | |
| if os.path.exists(start_path) and os.path.isdir(start_path): | |
| search_dir(start_path) | |
| elapsed_time = time.time() - start_time | |
| print(f"Search completed in {elapsed_time:.2f} seconds") | |
| print(f"Found {len(found_paths)} matching directories:") | |
| for path in found_paths: | |
| print(f" - {path}") | |
| return found_paths | |
| if __name__ == "__main__": | |
| # Look for both processed_data and .streamlit to understand how directories are organized | |
| search_for_directory('processed_data') | |
| search_for_directory('.streamlit') | |
| # Also search for document_chunks.pkl file directly | |
| print("\nSearching for document_chunks.pkl file...") | |
| for root, dirs, files in os.walk('/home/user'): | |
| if 'document_chunks.pkl' in files: | |
| full_path = os.path.join(root, 'document_chunks.pkl') | |
| print(f"FOUND FILE: {full_path}") | |
| # Print out environment variables - they might contain useful information | |
| print("\nEnvironment variables:") | |
| for key, value in os.environ.items(): | |
| if 'PATH' in key or 'DIR' in key or 'HOME' in key: | |
| print(f"{key}: {value}") |