dir2md-demo / app.py
Flamehaven's picture
Upload app.py
35c6442 verified
import gradio as gr
import git
import tempfile
import shutil
from pathlib import Path
import os
import json
import sys
# Ensure local dir2md package is importable in the HF Space
ROOT = Path(__file__).resolve().parents[1]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
from dir2md.core import generate_markdown_report, Config
def process_github_repo(
repo_url: str,
preset: str,
spicy: bool,
include_contents: bool,
max_size_mb: int,
shallow: bool,
):
"""
Clone a public GitHub repository and generate dir2md outputs (human MD + AI JSONL).
"""
temp_dir_path = None
try:
temp_dir_path = tempfile.mkdtemp()
temp_dir = Path(temp_dir_path)
gr.Info(f"Cloning repository: {repo_url} ...")
clone_args = {"to_path": temp_dir}
if shallow:
clone_args["depth"] = 1
clone_args["single_branch"] = True
git.Repo.clone_from(repo_url, **clone_args)
gr.Info("Repository cloned. Generating blueprint...")
# size guard
total_bytes = sum(f.stat().st_size for f in temp_dir.rglob("*") if f.is_file())
total_mb = total_bytes / (1024 * 1024)
if total_mb > max_size_mb:
raise ValueError(f"Repository too large: ~{total_mb:.1f} MB (limit {max_size_mb} MB)")
output_path = temp_dir / "blueprint.md"
cfg = Config(
root=temp_dir,
output=output_path,
preset=preset,
include_globs=[],
exclude_globs=[],
omit_globs=[],
respect_gitignore=True,
follow_symlinks=False,
max_bytes=None,
max_lines=None,
include_contents=include_contents,
llm_mode="summary", # more readable by default
budget_tokens=8000,
max_file_tokens=2000,
dedup_bits=16,
sample_head=120,
sample_tail=40,
strip_comments=False,
emit_manifest=False, # demo keeps outputs in-memory
explain_capsule=False,
no_timestamp=True,
masking_mode="basic",
spicy=spicy,
)
md_output = generate_markdown_report(cfg)
# AI-friendly JSONL output: reuse cfg with jsonl
cfg.output_format = "jsonl"
cfg.output = temp_dir / "blueprint.jsonl"
jsonl_output = generate_markdown_report(cfg)
gr.Info("Blueprint generated successfully!")
return md_output, jsonl_output
except Exception as e:
if temp_dir_path and os.path.exists(temp_dir_path):
shutil.rmtree(temp_dir_path)
return f"An error occurred: {e}", ""
finally:
if temp_dir_path and os.path.exists(temp_dir_path):
shutil.rmtree(temp_dir_path)
demo = gr.Interface(
fn=process_github_repo,
inputs=[
gr.Textbox(label="GitHub Repository URL", placeholder="https://github.com/Flamehaven/dir2md"),
gr.Radio(choices=["pro", "raw"], value="pro", label="Select Preset"),
gr.Checkbox(label="Spicy risk report", value=True),
gr.Checkbox(label="Include file contents", value=True),
gr.Slider(label="Max repo size (MB)", minimum=10, maximum=500, value=200, step=10),
gr.Checkbox(label="Shallow clone (depth=1)", value=True),
],
outputs=[
gr.Textbox(label="Markdown Blueprint (human)", lines=30, show_copy_button=True),
gr.Textbox(label="JSONL (AI-ready)", lines=15, show_copy_button=True),
],
title="dir2md: AI-Ready Repository Blueprint Generator",
description="Enter a public GitHub repository URL to convert its structure/content into a Markdown blueprint (human) and JSONL (AI).",
allow_flagging="never",
examples=[
["https://github.com/psf/requests", "pro", True, True, 200, True],
["https://github.com/gradio-app/gradio", "raw", True, True, 200, True],
],
)
demo.launch()