Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import requests | |
| import base64 | |
| import os | |
| from typing import List, Dict, Tuple, Optional | |
| import json | |
| # Must be the first Streamlit command | |
| st.set_page_config( | |
| page_title="GitHub Repository Tree Generator", | |
| page_icon="π³", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| class GitHubRepoAnalyzer: | |
| def __init__(self): | |
| self.github_token = os.environ.get('GITHUB_TOKEN') | |
| self.headers = { | |
| 'Accept': 'application/vnd.github.v3+json', | |
| 'User-Agent': 'GitHubRepoAnalyzer' | |
| } | |
| if self.github_token: | |
| self.headers['Authorization'] = f'token {self.github_token}' | |
| def parse_github_url(self, url: str) -> Tuple[str, str]: | |
| """Parse GitHub URL to extract owner and repo name.""" | |
| url = url.strip() | |
| if url.endswith('/'): | |
| url = url[:-1] | |
| if 'github.com/' in url: | |
| parts = url.split('github.com/')[-1].split('/') | |
| if len(parts) >= 2: | |
| owner = parts[0] | |
| repo = parts[1] | |
| if repo.endswith('.git'): | |
| repo = repo[:-4] | |
| return owner, repo | |
| raise ValueError("Invalid GitHub URL format") | |
| def get_branches(self, owner: str, repo: str) -> List[str]: | |
| """Fetch available branches for the repository.""" | |
| try: | |
| url = f"https://api.github.com/repos/{owner}/{repo}/branches" | |
| response = requests.get(url, headers=self.headers, timeout=10) | |
| if response.status_code == 200: | |
| branches = response.json() | |
| return [branch['name'] for branch in branches] | |
| else: | |
| return ['main', 'master'] | |
| except Exception: | |
| return ['main', 'master'] | |
| def get_repo_tree(self, owner: str, repo: str, branch: str = 'main', subfolder: str = '') -> Dict: | |
| """Get the complete tree structure of a repository.""" | |
| try: | |
| url = f"https://api.github.com/repos/{owner}/{repo}/git/trees/{branch}?recursive=1" | |
| response = requests.get(url, headers=self.headers, timeout=30) | |
| if response.status_code == 404: | |
| url = f"https://api.github.com/repos/{owner}/{repo}/git/trees/master?recursive=1" | |
| response = requests.get(url, headers=self.headers, timeout=30) | |
| if response.status_code != 200: | |
| return {"error": f"Failed to fetch repository tree. Status: {response.status_code}"} | |
| tree_data = response.json() | |
| filtered_tree = [] | |
| for item in tree_data.get('tree', []): | |
| path = item['path'] | |
| if subfolder: | |
| if not path.startswith(subfolder): | |
| continue | |
| filtered_tree.append({ | |
| 'path': path, | |
| 'type': item['type'], | |
| 'size': item.get('size', 0), | |
| 'url': item.get('url', '') | |
| }) | |
| return { | |
| "owner": owner, | |
| "repo": repo, | |
| "branch": branch, | |
| "subfolder": subfolder, | |
| "tree": filtered_tree | |
| } | |
| except Exception as e: | |
| return {"error": f"Error fetching repository tree: {str(e)}"} | |
| def format_tree_display(self, tree_data: Dict) -> str: | |
| """Format tree data for display.""" | |
| if "error" in tree_data: | |
| return f"β {tree_data['error']}" | |
| tree = tree_data.get('tree', []) | |
| if not tree: | |
| return "π No files found in the specified path." | |
| # Header information | |
| output = [] | |
| output.append(f"π¦ Repository: {tree_data['owner']}/{tree_data['repo']}") | |
| output.append(f"πΏ Branch: {tree_data['branch']}") | |
| if tree_data['subfolder']: | |
| output.append(f"π Subfolder: {tree_data['subfolder']}") | |
| output.append(f"π Total items: {len(tree)}") | |
| output.append("\n" + "="*60 + "\n") | |
| # Build tree structure | |
| tree_structure = self._build_tree_structure(tree, tree_data.get('subfolder', '')) | |
| output.append(tree_structure) | |
| return "\n".join(output) | |
| def _build_tree_structure(self, tree_items: List[Dict], subfolder: str = '') -> str: | |
| """Build a hierarchical tree structure with proper indentation and lines.""" | |
| if not tree_items: | |
| return "" | |
| # Create a directory structure | |
| dir_structure = {} | |
| # Process all items and build directory structure | |
| for item in tree_items: | |
| path = item['path'] | |
| # Remove subfolder prefix if present | |
| if subfolder and path.startswith(subfolder): | |
| path = path[len(subfolder):].lstrip('/') | |
| if not path: # Skip empty paths | |
| continue | |
| parts = path.split('/') | |
| current = dir_structure | |
| # Build nested structure | |
| for i, part in enumerate(parts): | |
| if part not in current: | |
| current[part] = { | |
| '_type': 'tree' if i < len(parts) - 1 or item['type'] == 'tree' else 'blob', | |
| '_size': item.get('size', 0) if i == len(parts) - 1 else 0, | |
| '_children': {} | |
| } | |
| current = current[part]['_children'] | |
| # Generate tree display | |
| return self._format_tree_recursive(dir_structure, "", True) | |
| def _format_tree_recursive(self, structure: Dict, prefix: str = "", is_root: bool = False) -> str: | |
| """Recursively format the tree structure with proper tree lines.""" | |
| lines = [] | |
| items = list(structure.items()) | |
| # Sort: directories first, then files, both alphabetically | |
| items.sort(key=lambda x: (x[1]['_type'] != 'tree', x[0].lower())) | |
| for i, (name, data) in enumerate(items): | |
| is_last = i == len(items) - 1 | |
| # Determine the tree characters | |
| if is_root: | |
| current_prefix = "" | |
| next_prefix = "" | |
| else: | |
| current_prefix = prefix + ("βββ " if is_last else "βββ ") | |
| next_prefix = prefix + (" " if is_last else "β ") | |
| # Format current item | |
| if data['_type'] == 'tree': | |
| # Directory | |
| display_name = f"{name}/" | |
| if not is_root: | |
| lines.append(f"{current_prefix}{display_name}") | |
| else: | |
| lines.append(f"{display_name}") | |
| # Recursively add children | |
| children_output = self._format_tree_recursive( | |
| data['_children'], | |
| next_prefix if not is_root else "", | |
| False | |
| ) | |
| if children_output: | |
| lines.append(children_output) | |
| else: | |
| # File | |
| size_info = f" ({data['_size']} bytes)" if data['_size'] > 0 else "" | |
| if not is_root: | |
| lines.append(f"{current_prefix}{name}{size_info}") | |
| else: | |
| lines.append(f"{name}{size_info}") | |
| return "\n".join(lines) | |
| def get_file_content(self, owner: str, repo: str, branch: str, file_path: str) -> str: | |
| """Get content of a specific file.""" | |
| try: | |
| url = f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}?ref={branch}" | |
| response = requests.get(url, headers=self.headers, timeout=15) | |
| if response.status_code != 200: | |
| return f"β Error fetching file: HTTP {response.status_code}" | |
| file_data = response.json() | |
| if file_data.get('encoding') == 'base64': | |
| try: | |
| content = base64.b64decode(file_data['content']).decode('utf-8') | |
| return content | |
| except UnicodeDecodeError: | |
| return "β Binary file - cannot display content" | |
| else: | |
| return file_data.get('content', 'No content available') | |
| except Exception as e: | |
| return f"β Error reading file: {str(e)}" | |
| def print_all_files(self, tree_data: Dict) -> str: | |
| """Print content of all files in the repository/subfolder.""" | |
| if "error" in tree_data: | |
| return f"β {tree_data['error']}" | |
| tree = tree_data.get('tree', []) | |
| files = [item for item in tree if item['type'] == 'blob'] | |
| if not files: | |
| return "π No files found to print." | |
| output = [] | |
| output.append(f"π¦ Repository: {tree_data['owner']}/{tree_data['repo']}") | |
| output.append(f"πΏ Branch: {tree_data['branch']}") | |
| if tree_data['subfolder']: | |
| output.append(f"π Subfolder: {tree_data['subfolder']}") | |
| output.append(f"π Total files: {len(files)}") | |
| output.append("\n" + "="*80 + "\n") | |
| sorted_files = sorted(files, key=lambda x: x['path']) | |
| for i, file_item in enumerate(sorted_files, 1): | |
| file_path = file_item['path'] | |
| output.append(f"\n{'='*20} FILE {i}/{len(files)} {'='*20}") | |
| output.append(f"π Path: {file_path}") | |
| output.append(f"π Size: {file_item['size']} bytes") | |
| output.append("-" * 60) | |
| content = self.get_file_content( | |
| tree_data['owner'], | |
| tree_data['repo'], | |
| tree_data['branch'], | |
| file_path | |
| ) | |
| output.append(content) | |
| output.append("\n" + "-" * 60) | |
| return "\n".join(output) | |
| # Initialize session state | |
| if 'tree_data' not in st.session_state: | |
| st.session_state.tree_data = {} | |
| if 'branches' not in st.session_state: | |
| st.session_state.branches = [] | |
| if 'tree_display' not in st.session_state: | |
| st.session_state.tree_display = "" | |
| if 'files_content' not in st.session_state: | |
| st.session_state.files_content = "" | |
| # Initialize analyzer | |
| analyzer = GitHubRepoAnalyzer() | |
| # Main title and description | |
| st.title("π³ GitHub Repository Tree Generator") | |
| st.markdown(""" | |
| Generate directory trees and print file contents from any public GitHub repository. | |
| Perfect for analyzing code structure and understanding project organization. | |
| """) | |
| # Sidebar for inputs | |
| with st.sidebar: | |
| st.header("π Repository Configuration") | |
| # GitHub URL input | |
| github_url = st.text_input( | |
| "GitHub Repository URL", | |
| placeholder="https://github.com/owner/repository", | |
| help="Enter the full GitHub repository URL" | |
| ) | |
| # Auto-fetch branches when URL changes | |
| if github_url and github_url.strip(): | |
| try: | |
| owner, repo = analyzer.parse_github_url(github_url) | |
| with st.spinner("Fetching branches..."): | |
| branches = analyzer.get_branches(owner, repo) | |
| st.session_state.branches = branches | |
| except Exception as e: | |
| st.error(f"Invalid URL: {str(e)}") | |
| st.session_state.branches = [] | |
| # Branch selection | |
| if st.session_state.branches: | |
| default_branch = 'main' if 'main' in st.session_state.branches else st.session_state.branches[0] | |
| try: | |
| default_index = st.session_state.branches.index(default_branch) | |
| except ValueError: | |
| default_index = 0 | |
| selected_branch = st.selectbox( | |
| "Branch", | |
| options=st.session_state.branches, | |
| index=default_index, | |
| help="Select the branch to analyze" | |
| ) | |
| else: | |
| selected_branch = st.text_input( | |
| "Branch", | |
| value="main", | |
| help="Enter branch name manually" | |
| ) | |
| # Subfolder path | |
| subfolder_path = st.text_input( | |
| "Subfolder Path (optional)", | |
| placeholder="e.g., src/components", | |
| help="Leave empty to analyze the entire repository" | |
| ) | |
| st.markdown("---") | |
| # Generate tree button | |
| if st.button("π³ Generate Tree", type="primary", use_container_width=True): | |
| if not github_url.strip(): | |
| st.error("Please enter a GitHub repository URL.") | |
| elif not selected_branch.strip(): | |
| st.error("Please enter a branch name.") | |
| else: | |
| try: | |
| with st.spinner("Generating repository tree..."): | |
| owner, repo = analyzer.parse_github_url(github_url) | |
| subfolder = subfolder_path.strip() | |
| st.session_state.tree_data = analyzer.get_repo_tree(owner, repo, selected_branch, subfolder) | |
| st.session_state.tree_display = analyzer.format_tree_display(st.session_state.tree_data) | |
| if "error" in st.session_state.tree_data: | |
| st.error(st.session_state.tree_data["error"]) | |
| else: | |
| st.success("β Tree generated successfully!") | |
| except Exception as e: | |
| st.error(f"Error: {str(e)}") | |
| # Main content area | |
| col1, col2 = st.columns([1, 1]) | |
| with col1: | |
| st.header("π Repository Tree") | |
| if st.session_state.tree_display: | |
| # Display tree in a text area for easy copying | |
| st.text_area( | |
| "Tree Structure", | |
| value=st.session_state.tree_display, | |
| height=400, | |
| help="You can select and copy the text from this area" | |
| ) | |
| # Action buttons | |
| col1a, col1b = st.columns(2) | |
| with col1a: | |
| st.download_button( | |
| label="πΎ Download Tree", | |
| data=st.session_state.tree_display, | |
| file_name=f"{st.session_state.tree_data.get('owner', 'repo')}-{st.session_state.tree_data.get('repo', 'tree')}-tree.txt", | |
| mime="text/plain", | |
| use_container_width=True | |
| ) | |
| with col1b: | |
| if st.button("π Copy Tree", use_container_width=True): | |
| st.code(st.session_state.tree_display, language="text") | |
| st.info("Tree structure displayed above - select and copy the text!") | |
| else: | |
| st.info("π Generate a tree using the sidebar to see the repository structure here.") | |
| with col2: | |
| st.header("π File Contents") | |
| if st.session_state.tree_data: | |
| if st.button("π Print All Files", type="secondary", use_container_width=True): | |
| with st.spinner("Reading all files..."): | |
| st.session_state.files_content = analyzer.print_all_files(st.session_state.tree_data) | |
| if st.session_state.files_content: | |
| # Display files content in a text area for easy copying | |
| st.text_area( | |
| "Files Content", | |
| value=st.session_state.files_content, | |
| height=400, | |
| help="You can select and copy the text from this area" | |
| ) | |
| # Action buttons | |
| col2a, col2b = st.columns(2) | |
| with col2a: | |
| st.download_button( | |
| label="πΎ Download Files", | |
| data=st.session_state.files_content, | |
| file_name=f"{st.session_state.tree_data.get('owner', 'repo')}-{st.session_state.tree_data.get('repo', 'files')}-content.txt", | |
| mime="text/plain", | |
| use_container_width=True | |
| ) | |
| with col2b: | |
| if st.button("π Copy Files", use_container_width=True): | |
| st.code(st.session_state.files_content, language="text") | |
| st.info("Files content displayed above - select and copy the text!") | |
| else: | |
| st.info("Generate a tree first to print file contents.") | |
| # Footer with instructions | |
| st.markdown("---") | |
| st.markdown(""" | |
| ## π Instructions: | |
| 1. **Enter GitHub URL**: Paste any public GitHub repository URL in the sidebar | |
| 2. **Select Branch**: Choose from auto-fetched branches (defaults to 'main') | |
| 3. **Subfolder (Optional)**: Specify a subfolder path to analyze only part of the repo | |
| 4. **Generate Tree**: Click to create the directory structure | |
| 5. **Print Files**: Click to display contents of all files in the tree | |
| 6. **Copy/Download**: Use the copy buttons or download buttons to save the results | |
| ## π§ Features: | |
| - β Auto-fetch available branches | |
| - β Support for subfolder analysis | |
| - β File size information | |
| - β Organized tree structure | |
| - β Complete file content printing | |
| - β Copy and download functionality | |
| - β Error handling and validation | |
| - β Responsive layout | |
| **Note**: For private repositories, you can add a `GITHUB_TOKEN` environment variable for authentication. | |
| """) | |