Spaces:

ketannnn
/

Repo2TXT

Sleeping

File size: 4,262 Bytes

d5fdeca

import os
import shutil
import tempfile
from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import FileResponse, HTMLResponse
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from starlette.background import BackgroundTask
from pydantic import BaseModel, HttpUrl
from git import Repo
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = FastAPI(title="Repo2TXT API")

# Path to the directory where index.html is located
BASE_DIR = os.path.dirname(os.path.abspath(__file__))

# Enable CORS for local development and frontend interaction
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

MAX_FILE_SIZE_MB = 2
MAX_LINES = 10000

SKIP_EXTENSIONS = {
    ".pkl", ".pyc", ".exe", ".dll", ".so", ".zip", ".tar", ".gz",
    ".jpg", ".jpeg", ".png", ".gif", ".mp4", ".mp3", ".wav", ".pdf",
    ".bin", ".pt", ".ckpt", ".onnx", ".glb", ".json", ".ico", ".svg"
}

SKIP_DIRS = {
    "__pycache__", "node_modules", "venv", ".venv",
    "env", ".git", "generated", "build", "dist",
    ".idea", ".vscode", ".next", ".github", "vendor"
}

class RepoRequest(BaseModel):
    url: str

def merge_repo(repo_url: str):
    temp_dir = tempfile.mkdtemp()
    repo_dir = os.path.join(temp_dir, "repo")
    output_file = os.path.join(temp_dir, "project.txt")

    try:
        logger.info(f"Cloning repo: {repo_url}")
        # Clone repo with a depth of 1 to save time/bandwidth
        Repo.clone_from(repo_url, repo_dir, depth=1)

        with open(output_file, "w", encoding="utf-8") as out:
            out.write(f"Source Repository: {repo_url}\n")
            out.write("="*50 + "\n\n")

            for root, dirs, files in os.walk(repo_dir):
                # Modify dirs in-place to skip unwanted directories
                dirs[:] = [d for d in dirs if d not in SKIP_DIRS]

                for file in files:
                    ext = os.path.splitext(file)[1].lower()
                    if ext in SKIP_EXTENSIONS or file.startswith('.'):
                        continue

                    file_path = os.path.join(root, file)

                    # Skip large files
                    try:
                        size_mb = os.path.getsize(file_path) / (1024 * 1024)
                        if size_mb > MAX_FILE_SIZE_MB:
                            continue

                        with open(file_path, "r", encoding="utf-8", errors='ignore') as f:
                            lines = f.readlines()

                            if len(lines) > MAX_LINES:
                                logger.info(f"Skipping {file} due to line count limit")
                                continue

                            rel_path = os.path.relpath(file_path, repo_dir)
                            out.write(f"\n\n===== FILE: {rel_path} =====\n\n")
                            out.writelines(lines)
                    except Exception as e:
                        logger.warning(f"Could not process file {file_path}: {e}")
                        continue

        return output_file, temp_dir

    except Exception as e:
        logger.error(f"Error processing repo: {e}")
        shutil.rmtree(temp_dir, ignore_errors=True)
        raise HTTPException(status_code=400, detail=f"Failed to process repository: {str(e)}")

@app.post("/download")
async def download_repo(req: RepoRequest):
    if not req.url:
        raise HTTPException(status_code=400, detail="Repository URL is required")
    
    file_path, temp_dir = merge_repo(req.url)

    if not os.path.exists(file_path):
        shutil.rmtree(temp_dir, ignore_errors=True)
        raise HTTPException(status_code=500, detail="Generated file not found")

    return FileResponse(
        file_path,
        media_type="text/plain",
        filename="project.txt",
        background=BackgroundTask(shutil.rmtree, temp_dir, ignore_errors=True)
    )

@app.get("/")
async def root():
    index_path = os.path.join(BASE_DIR, "index.html")
    if os.path.exists(index_path):
        return FileResponse(index_path)
    return {"message": "Repo2TXT API is running. index.html not found."}