import streamlit as st import requests import base64 import os from typing import List, Dict, Tuple, Optional import json # Must be the first Streamlit command st.set_page_config( page_title="GitHub Repository Tree Generator", page_icon="🌳", layout="wide", initial_sidebar_state="expanded" ) class GitHubRepoAnalyzer: def __init__(self): self.github_token = os.environ.get('GITHUB_TOKEN') self.headers = { 'Accept': 'application/vnd.github.v3+json', 'User-Agent': 'GitHubRepoAnalyzer' } if self.github_token: self.headers['Authorization'] = f'token {self.github_token}' def parse_github_url(self, url: str) -> Tuple[str, str]: """Parse GitHub URL to extract owner and repo name.""" url = url.strip() if url.endswith('/'): url = url[:-1] if 'github.com/' in url: parts = url.split('github.com/')[-1].split('/') if len(parts) >= 2: owner = parts[0] repo = parts[1] if repo.endswith('.git'): repo = repo[:-4] return owner, repo raise ValueError("Invalid GitHub URL format") def get_branches(self, owner: str, repo: str) -> List[str]: """Fetch available branches for the repository.""" try: url = f"https://api.github.com/repos/{owner}/{repo}/branches" response = requests.get(url, headers=self.headers, timeout=10) if response.status_code == 200: branches = response.json() return [branch['name'] for branch in branches] else: return ['main', 'master'] except Exception: return ['main', 'master'] def get_repo_tree(self, owner: str, repo: str, branch: str = 'main', subfolder: str = '') -> Dict: """Get the complete tree structure of a repository.""" try: url = f"https://api.github.com/repos/{owner}/{repo}/git/trees/{branch}?recursive=1" response = requests.get(url, headers=self.headers, timeout=30) if response.status_code == 404: url = f"https://api.github.com/repos/{owner}/{repo}/git/trees/master?recursive=1" response = requests.get(url, headers=self.headers, timeout=30) if response.status_code != 200: return {"error": f"Failed to fetch repository tree. Status: {response.status_code}"} tree_data = response.json() filtered_tree = [] for item in tree_data.get('tree', []): path = item['path'] if subfolder: if not path.startswith(subfolder): continue filtered_tree.append({ 'path': path, 'type': item['type'], 'size': item.get('size', 0), 'url': item.get('url', '') }) return { "owner": owner, "repo": repo, "branch": branch, "subfolder": subfolder, "tree": filtered_tree } except Exception as e: return {"error": f"Error fetching repository tree: {str(e)}"} def format_tree_display(self, tree_data: Dict) -> str: """Format tree data for display.""" if "error" in tree_data: return f"❌ {tree_data['error']}" tree = tree_data.get('tree', []) if not tree: return "📁 No files found in the specified path." # Header information output = [] output.append(f"📦 Repository: {tree_data['owner']}/{tree_data['repo']}") output.append(f"🌿 Branch: {tree_data['branch']}") if tree_data['subfolder']: output.append(f"📁 Subfolder: {tree_data['subfolder']}") output.append(f"📊 Total items: {len(tree)}") output.append("\n" + "="*60 + "\n") # Build tree structure tree_structure = self._build_tree_structure(tree, tree_data.get('subfolder', '')) output.append(tree_structure) return "\n".join(output) def _build_tree_structure(self, tree_items: List[Dict], subfolder: str = '') -> str: """Build a hierarchical tree structure with proper indentation and lines.""" if not tree_items: return "" # Create a directory structure dir_structure = {} # Process all items and build directory structure for item in tree_items: path = item['path'] # Remove subfolder prefix if present if subfolder and path.startswith(subfolder): path = path[len(subfolder):].lstrip('/') if not path: # Skip empty paths continue parts = path.split('/') current = dir_structure # Build nested structure for i, part in enumerate(parts): if part not in current: current[part] = { '_type': 'tree' if i < len(parts) - 1 or item['type'] == 'tree' else 'blob', '_size': item.get('size', 0) if i == len(parts) - 1 else 0, '_children': {} } current = current[part]['_children'] # Generate tree display return self._format_tree_recursive(dir_structure, "", True) def _format_tree_recursive(self, structure: Dict, prefix: str = "", is_root: bool = False) -> str: """Recursively format the tree structure with proper tree lines.""" lines = [] items = list(structure.items()) # Sort: directories first, then files, both alphabetically items.sort(key=lambda x: (x[1]['_type'] != 'tree', x[0].lower())) for i, (name, data) in enumerate(items): is_last = i == len(items) - 1 # Determine the tree characters if is_root: current_prefix = "" next_prefix = "" else: current_prefix = prefix + ("└── " if is_last else "├── ") next_prefix = prefix + (" " if is_last else "│ ") # Format current item if data['_type'] == 'tree': # Directory display_name = f"{name}/" if not is_root: lines.append(f"{current_prefix}{display_name}") else: lines.append(f"{display_name}") # Recursively add children children_output = self._format_tree_recursive( data['_children'], next_prefix if not is_root else "", False ) if children_output: lines.append(children_output) else: # File size_info = f" ({data['_size']} bytes)" if data['_size'] > 0 else "" if not is_root: lines.append(f"{current_prefix}{name}{size_info}") else: lines.append(f"{name}{size_info}") return "\n".join(lines) def get_file_content(self, owner: str, repo: str, branch: str, file_path: str) -> str: """Get content of a specific file.""" try: url = f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}?ref={branch}" response = requests.get(url, headers=self.headers, timeout=15) if response.status_code != 200: return f"❌ Error fetching file: HTTP {response.status_code}" file_data = response.json() if file_data.get('encoding') == 'base64': try: content = base64.b64decode(file_data['content']).decode('utf-8') return content except UnicodeDecodeError: return "❌ Binary file - cannot display content" else: return file_data.get('content', 'No content available') except Exception as e: return f"❌ Error reading file: {str(e)}" def print_all_files(self, tree_data: Dict) -> str: """Print content of all files in the repository/subfolder.""" if "error" in tree_data: return f"❌ {tree_data['error']}" tree = tree_data.get('tree', []) files = [item for item in tree if item['type'] == 'blob'] if not files: return "📁 No files found to print." output = [] output.append(f"📦 Repository: {tree_data['owner']}/{tree_data['repo']}") output.append(f"🌿 Branch: {tree_data['branch']}") if tree_data['subfolder']: output.append(f"📁 Subfolder: {tree_data['subfolder']}") output.append(f"📄 Total files: {len(files)}") output.append("\n" + "="*80 + "\n") sorted_files = sorted(files, key=lambda x: x['path']) for i, file_item in enumerate(sorted_files, 1): file_path = file_item['path'] output.append(f"\n{'='*20} FILE {i}/{len(files)} {'='*20}") output.append(f"📄 Path: {file_path}") output.append(f"📊 Size: {file_item['size']} bytes") output.append("-" * 60) content = self.get_file_content( tree_data['owner'], tree_data['repo'], tree_data['branch'], file_path ) output.append(content) output.append("\n" + "-" * 60) return "\n".join(output) # Initialize session state if 'tree_data' not in st.session_state: st.session_state.tree_data = {} if 'branches' not in st.session_state: st.session_state.branches = [] if 'tree_display' not in st.session_state: st.session_state.tree_display = "" if 'files_content' not in st.session_state: st.session_state.files_content = "" # Initialize analyzer analyzer = GitHubRepoAnalyzer() # Main title and description st.title("🌳 GitHub Repository Tree Generator") st.markdown(""" Generate directory trees and print file contents from any public GitHub repository. Perfect for analyzing code structure and understanding project organization. """) # Sidebar for inputs with st.sidebar: st.header("📝 Repository Configuration") # GitHub URL input github_url = st.text_input( "GitHub Repository URL", placeholder="https://github.com/owner/repository", help="Enter the full GitHub repository URL" ) # Auto-fetch branches when URL changes if github_url and github_url.strip(): try: owner, repo = analyzer.parse_github_url(github_url) with st.spinner("Fetching branches..."): branches = analyzer.get_branches(owner, repo) st.session_state.branches = branches except Exception as e: st.error(f"Invalid URL: {str(e)}") st.session_state.branches = [] # Branch selection if st.session_state.branches: default_branch = 'main' if 'main' in st.session_state.branches else st.session_state.branches[0] try: default_index = st.session_state.branches.index(default_branch) except ValueError: default_index = 0 selected_branch = st.selectbox( "Branch", options=st.session_state.branches, index=default_index, help="Select the branch to analyze" ) else: selected_branch = st.text_input( "Branch", value="main", help="Enter branch name manually" ) # Subfolder path subfolder_path = st.text_input( "Subfolder Path (optional)", placeholder="e.g., src/components", help="Leave empty to analyze the entire repository" ) st.markdown("---") # Generate tree button if st.button("🌳 Generate Tree", type="primary", use_container_width=True): if not github_url.strip(): st.error("Please enter a GitHub repository URL.") elif not selected_branch.strip(): st.error("Please enter a branch name.") else: try: with st.spinner("Generating repository tree..."): owner, repo = analyzer.parse_github_url(github_url) subfolder = subfolder_path.strip() st.session_state.tree_data = analyzer.get_repo_tree(owner, repo, selected_branch, subfolder) st.session_state.tree_display = analyzer.format_tree_display(st.session_state.tree_data) if "error" in st.session_state.tree_data: st.error(st.session_state.tree_data["error"]) else: st.success("✅ Tree generated successfully!") except Exception as e: st.error(f"Error: {str(e)}") # Main content area col1, col2 = st.columns([1, 1]) with col1: st.header("📁 Repository Tree") if st.session_state.tree_display: # Display tree in a text area for easy copying st.text_area( "Tree Structure", value=st.session_state.tree_display, height=400, help="You can select and copy the text from this area" ) # Action buttons col1a, col1b = st.columns(2) with col1a: st.download_button( label="💾 Download Tree", data=st.session_state.tree_display, file_name=f"{st.session_state.tree_data.get('owner', 'repo')}-{st.session_state.tree_data.get('repo', 'tree')}-tree.txt", mime="text/plain", use_container_width=True ) with col1b: if st.button("📋 Copy Tree", use_container_width=True): st.code(st.session_state.tree_display, language="text") st.info("Tree structure displayed above - select and copy the text!") else: st.info("👈 Generate a tree using the sidebar to see the repository structure here.") with col2: st.header("📄 File Contents") if st.session_state.tree_data: if st.button("📄 Print All Files", type="secondary", use_container_width=True): with st.spinner("Reading all files..."): st.session_state.files_content = analyzer.print_all_files(st.session_state.tree_data) if st.session_state.files_content: # Display files content in a text area for easy copying st.text_area( "Files Content", value=st.session_state.files_content, height=400, help="You can select and copy the text from this area" ) # Action buttons col2a, col2b = st.columns(2) with col2a: st.download_button( label="💾 Download Files", data=st.session_state.files_content, file_name=f"{st.session_state.tree_data.get('owner', 'repo')}-{st.session_state.tree_data.get('repo', 'files')}-content.txt", mime="text/plain", use_container_width=True ) with col2b: if st.button("📋 Copy Files", use_container_width=True): st.code(st.session_state.files_content, language="text") st.info("Files content displayed above - select and copy the text!") else: st.info("Generate a tree first to print file contents.") # Footer with instructions st.markdown("---") st.markdown(""" ## 📋 Instructions: 1. **Enter GitHub URL**: Paste any public GitHub repository URL in the sidebar 2. **Select Branch**: Choose from auto-fetched branches (defaults to 'main') 3. **Subfolder (Optional)**: Specify a subfolder path to analyze only part of the repo 4. **Generate Tree**: Click to create the directory structure 5. **Print Files**: Click to display contents of all files in the tree 6. **Copy/Download**: Use the copy buttons or download buttons to save the results ## 🔧 Features: - ✅ Auto-fetch available branches - ✅ Support for subfolder analysis - ✅ File size information - ✅ Organized tree structure - ✅ Complete file content printing - ✅ Copy and download functionality - ✅ Error handling and validation - ✅ Responsive layout **Note**: For private repositories, you can add a `GITHUB_TOKEN` environment variable for authentication. """)