github-repo-util / src /streamlit_app.py
ishans24's picture
Update src/streamlit_app.py
884ba03 verified
import streamlit as st
import requests
import base64
import os
from typing import List, Dict, Tuple, Optional
import json
# Must be the first Streamlit command
st.set_page_config(
page_title="GitHub Repository Tree Generator",
page_icon="🌳",
layout="wide",
initial_sidebar_state="expanded"
)
class GitHubRepoAnalyzer:
def __init__(self):
self.github_token = os.environ.get('GITHUB_TOKEN')
self.headers = {
'Accept': 'application/vnd.github.v3+json',
'User-Agent': 'GitHubRepoAnalyzer'
}
if self.github_token:
self.headers['Authorization'] = f'token {self.github_token}'
def parse_github_url(self, url: str) -> Tuple[str, str]:
"""Parse GitHub URL to extract owner and repo name."""
url = url.strip()
if url.endswith('/'):
url = url[:-1]
if 'github.com/' in url:
parts = url.split('github.com/')[-1].split('/')
if len(parts) >= 2:
owner = parts[0]
repo = parts[1]
if repo.endswith('.git'):
repo = repo[:-4]
return owner, repo
raise ValueError("Invalid GitHub URL format")
def get_branches(self, owner: str, repo: str) -> List[str]:
"""Fetch available branches for the repository."""
try:
url = f"https://api.github.com/repos/{owner}/{repo}/branches"
response = requests.get(url, headers=self.headers, timeout=10)
if response.status_code == 200:
branches = response.json()
return [branch['name'] for branch in branches]
else:
return ['main', 'master']
except Exception:
return ['main', 'master']
def get_repo_tree(self, owner: str, repo: str, branch: str = 'main', subfolder: str = '') -> Dict:
"""Get the complete tree structure of a repository."""
try:
url = f"https://api.github.com/repos/{owner}/{repo}/git/trees/{branch}?recursive=1"
response = requests.get(url, headers=self.headers, timeout=30)
if response.status_code == 404:
url = f"https://api.github.com/repos/{owner}/{repo}/git/trees/master?recursive=1"
response = requests.get(url, headers=self.headers, timeout=30)
if response.status_code != 200:
return {"error": f"Failed to fetch repository tree. Status: {response.status_code}"}
tree_data = response.json()
filtered_tree = []
for item in tree_data.get('tree', []):
path = item['path']
if subfolder:
if not path.startswith(subfolder):
continue
filtered_tree.append({
'path': path,
'type': item['type'],
'size': item.get('size', 0),
'url': item.get('url', '')
})
return {
"owner": owner,
"repo": repo,
"branch": branch,
"subfolder": subfolder,
"tree": filtered_tree
}
except Exception as e:
return {"error": f"Error fetching repository tree: {str(e)}"}
def format_tree_display(self, tree_data: Dict) -> str:
"""Format tree data for display."""
if "error" in tree_data:
return f"❌ {tree_data['error']}"
tree = tree_data.get('tree', [])
if not tree:
return "πŸ“ No files found in the specified path."
# Header information
output = []
output.append(f"πŸ“¦ Repository: {tree_data['owner']}/{tree_data['repo']}")
output.append(f"🌿 Branch: {tree_data['branch']}")
if tree_data['subfolder']:
output.append(f"πŸ“ Subfolder: {tree_data['subfolder']}")
output.append(f"πŸ“Š Total items: {len(tree)}")
output.append("\n" + "="*60 + "\n")
# Build tree structure
tree_structure = self._build_tree_structure(tree, tree_data.get('subfolder', ''))
output.append(tree_structure)
return "\n".join(output)
def _build_tree_structure(self, tree_items: List[Dict], subfolder: str = '') -> str:
"""Build a hierarchical tree structure with proper indentation and lines."""
if not tree_items:
return ""
# Create a directory structure
dir_structure = {}
# Process all items and build directory structure
for item in tree_items:
path = item['path']
# Remove subfolder prefix if present
if subfolder and path.startswith(subfolder):
path = path[len(subfolder):].lstrip('/')
if not path: # Skip empty paths
continue
parts = path.split('/')
current = dir_structure
# Build nested structure
for i, part in enumerate(parts):
if part not in current:
current[part] = {
'_type': 'tree' if i < len(parts) - 1 or item['type'] == 'tree' else 'blob',
'_size': item.get('size', 0) if i == len(parts) - 1 else 0,
'_children': {}
}
current = current[part]['_children']
# Generate tree display
return self._format_tree_recursive(dir_structure, "", True)
def _format_tree_recursive(self, structure: Dict, prefix: str = "", is_root: bool = False) -> str:
"""Recursively format the tree structure with proper tree lines."""
lines = []
items = list(structure.items())
# Sort: directories first, then files, both alphabetically
items.sort(key=lambda x: (x[1]['_type'] != 'tree', x[0].lower()))
for i, (name, data) in enumerate(items):
is_last = i == len(items) - 1
# Determine the tree characters
if is_root:
current_prefix = ""
next_prefix = ""
else:
current_prefix = prefix + ("└── " if is_last else "β”œβ”€β”€ ")
next_prefix = prefix + (" " if is_last else "β”‚ ")
# Format current item
if data['_type'] == 'tree':
# Directory
display_name = f"{name}/"
if not is_root:
lines.append(f"{current_prefix}{display_name}")
else:
lines.append(f"{display_name}")
# Recursively add children
children_output = self._format_tree_recursive(
data['_children'],
next_prefix if not is_root else "",
False
)
if children_output:
lines.append(children_output)
else:
# File
size_info = f" ({data['_size']} bytes)" if data['_size'] > 0 else ""
if not is_root:
lines.append(f"{current_prefix}{name}{size_info}")
else:
lines.append(f"{name}{size_info}")
return "\n".join(lines)
def get_file_content(self, owner: str, repo: str, branch: str, file_path: str) -> str:
"""Get content of a specific file."""
try:
url = f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}?ref={branch}"
response = requests.get(url, headers=self.headers, timeout=15)
if response.status_code != 200:
return f"❌ Error fetching file: HTTP {response.status_code}"
file_data = response.json()
if file_data.get('encoding') == 'base64':
try:
content = base64.b64decode(file_data['content']).decode('utf-8')
return content
except UnicodeDecodeError:
return "❌ Binary file - cannot display content"
else:
return file_data.get('content', 'No content available')
except Exception as e:
return f"❌ Error reading file: {str(e)}"
def print_all_files(self, tree_data: Dict) -> str:
"""Print content of all files in the repository/subfolder."""
if "error" in tree_data:
return f"❌ {tree_data['error']}"
tree = tree_data.get('tree', [])
files = [item for item in tree if item['type'] == 'blob']
if not files:
return "πŸ“ No files found to print."
output = []
output.append(f"πŸ“¦ Repository: {tree_data['owner']}/{tree_data['repo']}")
output.append(f"🌿 Branch: {tree_data['branch']}")
if tree_data['subfolder']:
output.append(f"πŸ“ Subfolder: {tree_data['subfolder']}")
output.append(f"πŸ“„ Total files: {len(files)}")
output.append("\n" + "="*80 + "\n")
sorted_files = sorted(files, key=lambda x: x['path'])
for i, file_item in enumerate(sorted_files, 1):
file_path = file_item['path']
output.append(f"\n{'='*20} FILE {i}/{len(files)} {'='*20}")
output.append(f"πŸ“„ Path: {file_path}")
output.append(f"πŸ“Š Size: {file_item['size']} bytes")
output.append("-" * 60)
content = self.get_file_content(
tree_data['owner'],
tree_data['repo'],
tree_data['branch'],
file_path
)
output.append(content)
output.append("\n" + "-" * 60)
return "\n".join(output)
# Initialize session state
if 'tree_data' not in st.session_state:
st.session_state.tree_data = {}
if 'branches' not in st.session_state:
st.session_state.branches = []
if 'tree_display' not in st.session_state:
st.session_state.tree_display = ""
if 'files_content' not in st.session_state:
st.session_state.files_content = ""
# Initialize analyzer
analyzer = GitHubRepoAnalyzer()
# Main title and description
st.title("🌳 GitHub Repository Tree Generator")
st.markdown("""
Generate directory trees and print file contents from any public GitHub repository.
Perfect for analyzing code structure and understanding project organization.
""")
# Sidebar for inputs
with st.sidebar:
st.header("πŸ“ Repository Configuration")
# GitHub URL input
github_url = st.text_input(
"GitHub Repository URL",
placeholder="https://github.com/owner/repository",
help="Enter the full GitHub repository URL"
)
# Auto-fetch branches when URL changes
if github_url and github_url.strip():
try:
owner, repo = analyzer.parse_github_url(github_url)
with st.spinner("Fetching branches..."):
branches = analyzer.get_branches(owner, repo)
st.session_state.branches = branches
except Exception as e:
st.error(f"Invalid URL: {str(e)}")
st.session_state.branches = []
# Branch selection
if st.session_state.branches:
default_branch = 'main' if 'main' in st.session_state.branches else st.session_state.branches[0]
try:
default_index = st.session_state.branches.index(default_branch)
except ValueError:
default_index = 0
selected_branch = st.selectbox(
"Branch",
options=st.session_state.branches,
index=default_index,
help="Select the branch to analyze"
)
else:
selected_branch = st.text_input(
"Branch",
value="main",
help="Enter branch name manually"
)
# Subfolder path
subfolder_path = st.text_input(
"Subfolder Path (optional)",
placeholder="e.g., src/components",
help="Leave empty to analyze the entire repository"
)
st.markdown("---")
# Generate tree button
if st.button("🌳 Generate Tree", type="primary", use_container_width=True):
if not github_url.strip():
st.error("Please enter a GitHub repository URL.")
elif not selected_branch.strip():
st.error("Please enter a branch name.")
else:
try:
with st.spinner("Generating repository tree..."):
owner, repo = analyzer.parse_github_url(github_url)
subfolder = subfolder_path.strip()
st.session_state.tree_data = analyzer.get_repo_tree(owner, repo, selected_branch, subfolder)
st.session_state.tree_display = analyzer.format_tree_display(st.session_state.tree_data)
if "error" in st.session_state.tree_data:
st.error(st.session_state.tree_data["error"])
else:
st.success("βœ… Tree generated successfully!")
except Exception as e:
st.error(f"Error: {str(e)}")
# Main content area
col1, col2 = st.columns([1, 1])
with col1:
st.header("πŸ“ Repository Tree")
if st.session_state.tree_display:
# Display tree in a text area for easy copying
st.text_area(
"Tree Structure",
value=st.session_state.tree_display,
height=400,
help="You can select and copy the text from this area"
)
# Action buttons
col1a, col1b = st.columns(2)
with col1a:
st.download_button(
label="πŸ’Ύ Download Tree",
data=st.session_state.tree_display,
file_name=f"{st.session_state.tree_data.get('owner', 'repo')}-{st.session_state.tree_data.get('repo', 'tree')}-tree.txt",
mime="text/plain",
use_container_width=True
)
with col1b:
if st.button("πŸ“‹ Copy Tree", use_container_width=True):
st.code(st.session_state.tree_display, language="text")
st.info("Tree structure displayed above - select and copy the text!")
else:
st.info("πŸ‘ˆ Generate a tree using the sidebar to see the repository structure here.")
with col2:
st.header("πŸ“„ File Contents")
if st.session_state.tree_data:
if st.button("πŸ“„ Print All Files", type="secondary", use_container_width=True):
with st.spinner("Reading all files..."):
st.session_state.files_content = analyzer.print_all_files(st.session_state.tree_data)
if st.session_state.files_content:
# Display files content in a text area for easy copying
st.text_area(
"Files Content",
value=st.session_state.files_content,
height=400,
help="You can select and copy the text from this area"
)
# Action buttons
col2a, col2b = st.columns(2)
with col2a:
st.download_button(
label="πŸ’Ύ Download Files",
data=st.session_state.files_content,
file_name=f"{st.session_state.tree_data.get('owner', 'repo')}-{st.session_state.tree_data.get('repo', 'files')}-content.txt",
mime="text/plain",
use_container_width=True
)
with col2b:
if st.button("πŸ“‹ Copy Files", use_container_width=True):
st.code(st.session_state.files_content, language="text")
st.info("Files content displayed above - select and copy the text!")
else:
st.info("Generate a tree first to print file contents.")
# Footer with instructions
st.markdown("---")
st.markdown("""
## πŸ“‹ Instructions:
1. **Enter GitHub URL**: Paste any public GitHub repository URL in the sidebar
2. **Select Branch**: Choose from auto-fetched branches (defaults to 'main')
3. **Subfolder (Optional)**: Specify a subfolder path to analyze only part of the repo
4. **Generate Tree**: Click to create the directory structure
5. **Print Files**: Click to display contents of all files in the tree
6. **Copy/Download**: Use the copy buttons or download buttons to save the results
## πŸ”§ Features:
- βœ… Auto-fetch available branches
- βœ… Support for subfolder analysis
- βœ… File size information
- βœ… Organized tree structure
- βœ… Complete file content printing
- βœ… Copy and download functionality
- βœ… Error handling and validation
- βœ… Responsive layout
**Note**: For private repositories, you can add a `GITHUB_TOKEN` environment variable for authentication.
""")