| | import gradio as gr |
| | import os |
| | import pandas as pd |
| | import plotly.graph_objects as go |
| | import asyncio |
| | from datetime import datetime |
| | import re |
| | import pathlib |
| |
|
| | |
| | |
| | UPDATE_INTERVAL = 250 |
| |
|
| | def parse_filename_words(filename): |
| | """ |
| | Extracts contiguous groups of letters from a filename, ignoring numbers and symbols. |
| | Example: "Aarons123File-482.md" -> "Aarons, File, md" |
| | """ |
| | |
| | words = re.findall('[A-Za-z]+', filename) |
| | return ", ".join(words) if words else "N/A" |
| |
|
| | def get_file_info(path, root_path): |
| | """ |
| | Gathers required information for a single file. |
| | Returns a dictionary or None if path is not a file or is inaccessible. |
| | """ |
| | try: |
| | if not os.path.isfile(path): |
| | return None |
| | |
| | stat = os.stat(path) |
| | size = stat.st_size |
| | |
| | |
| | if size == 0: |
| | return None |
| |
|
| | |
| | try: |
| | relative_path = os.path.relpath(path, root_path) |
| | top_level_dir = relative_path.split(os.sep)[0] |
| | except ValueError: |
| | top_level_dir = os.path.basename(root_path) |
| |
|
| | |
| | parent_path = str(pathlib.Path(*pathlib.Path(relative_path).parts[:-1])) |
| | if parent_path == ".": |
| | parent_path = top_level_dir |
| |
|
| | return { |
| | 'path': path, |
| | 'label': os.path.basename(path), |
| | 'parent': parent_path, |
| | 'size': size, |
| | 'color_group': top_level_dir, |
| | 'created': datetime.fromtimestamp(stat.st_ctime).strftime('%Y-%m-%d %H:%M:%S'), |
| | 'modified': datetime.fromtimestamp(stat.st_mtime).strftime('%Y-%m-%d %H:%M:%S'), |
| | 'keywords': parse_filename_words(os.path.basename(path)) |
| | } |
| | except (OSError, FileNotFoundError): |
| | return None |
| |
|
| | def create_treemap_figure(df): |
| | """ |
| | Generates the Plotly treemap figure from a DataFrame of file info. |
| | """ |
| | if df.empty: |
| | return go.Figure(go.Treemap( |
| | labels=["Your scan will appear here."], |
| | parents=[""], |
| | values=[1] |
| | )) |
| | |
| | |
| | root_label = os.path.basename(df.iloc[0]['path']) |
| | |
| | fig = go.Figure(go.Treemap( |
| | ids=df['path'], |
| | labels=df['label'], |
| | parents=df['parent'], |
| | values=df['size'], |
| | marker_colors=df['color_group'], |
| | tiling_method='squarified', |
| | root_label=root_label, |
| | customdata=df[['size', 'modified', 'created', 'keywords']], |
| | hovertemplate=( |
| | "<b>%{label}</b><br>" |
| | "Size: %{customdata[0]:.2s}B<br>" |
| | "Modified: %{customdata[1]}<br>" |
| | "Created: %{customdata[2]}<br>" |
| | "Keywords: %{customdata[3]}<br>" |
| | "Path: %{id}<extra></extra>" |
| | ), |
| | pathbar={'visible': True} |
| | )) |
| | |
| | fig.update_layout( |
| | margin=dict(t=50, l=25, r=25, b=25), |
| | title="File System Treemap" |
| | ) |
| | return fig |
| |
|
| | async def scan_directory(directory, stop_flag_state, progress=gr.Progress(track_tqdm=True)): |
| | """ |
| | Asynchronously scans a directory, yielding updates to the UI. |
| | """ |
| | if not directory or not os.path.isdir(directory): |
| | yield create_treemap_figure(pd.DataFrame()), "Invalid directory path.", pd.DataFrame() |
| | return |
| |
|
| | file_list = [] |
| | processed_count = 0 |
| | |
| | |
| | stop_flag_state['stop'] = False |
| |
|
| | |
| | df = pd.DataFrame() |
| |
|
| | progress(0, desc="Starting scan...") |
| |
|
| | for root, _, files in os.walk(directory, topdown=True): |
| | if stop_flag_state['stop']: |
| | progress(1.0, "Scan stopped by user.") |
| | break |
| |
|
| | for name in files: |
| | file_path = os.path.join(root, name) |
| | info = get_file_info(file_path, directory) |
| | if info: |
| | file_list.append(info) |
| | processed_count += 1 |
| | |
| | |
| | if processed_count % UPDATE_INTERVAL == 0: |
| | df = pd.DataFrame(file_list) |
| | yield create_treemap_figure(df), f"Scanned {processed_count} files...", df |
| | await asyncio.sleep(0.01) |
| |
|
| | |
| | df = pd.DataFrame(file_list) |
| | final_status = f"Scan complete. Found {len(df)} files." |
| | if stop_flag_state['stop']: |
| | final_status = f"Scan stopped. Displaying {len(df)} found files." |
| | |
| | yield create_treemap_figure(df), final_status, df |
| |
|
| | def stop_scan(stop_flag_state): |
| | """Sets the stop flag to True.""" |
| | stop_flag_state['stop'] = True |
| | return stop_flag_state, "Stopping scan..." |
| |
|
| | |
| | with gr.Blocks(theme=gr.themes.Soft(), title="File System Treemap") as app: |
| | |
| | stop_flag = gr.State({'stop': False}) |
| |
|
| | gr.Markdown("# 📁 File System Treemap Visualizer") |
| | gr.Markdown("Enter a directory path to generate a squarified treemap. The visualization will build in real-time.") |
| |
|
| | with gr.Row(): |
| | path_input = gr.Textbox( |
| | label="Directory Path", |
| | placeholder="e.g., C:\\Users\\YourUser\\Documents", |
| | scale=3 |
| | ) |
| | start_button = gr.Button("Start Scan", variant="primary", scale=1) |
| | stop_button = gr.Button("Stop Scan", variant="stop", scale=1) |
| | |
| | status_label = gr.Label("Status: Ready") |
| | |
| | with gr.Tabs(): |
| | with gr.TabItem("Treemap Visualization"): |
| | plot_output = gr.Plot(interactive=True) |
| | with gr.TabItem("Data Table"): |
| | data_output = gr.DataFrame(wrap=True) |
| |
|
| | |
| | start_button.click( |
| | fn=scan_directory, |
| | inputs=[path_input, stop_flag], |
| | outputs=[plot_output, status_label, data_output] |
| | ) |
| | |
| | stop_button.click( |
| | fn=stop_scan, |
| | inputs=[stop_flag], |
| | outputs=[stop_flag, status_label], |
| | cancels=[start_button.click] |
| | ) |
| |
|
| | if __name__ == "__main__": |
| | app.launch() |