import os import shutil import tempfile from fastapi import FastAPI, HTTPException, Request from fastapi.responses import FileResponse, HTMLResponse from fastapi.middleware.cors import CORSMiddleware from fastapi.staticfiles import StaticFiles from starlette.background import BackgroundTask from pydantic import BaseModel, HttpUrl from git import Repo import logging # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) app = FastAPI(title="Repo2TXT API") # Path to the directory where index.html is located BASE_DIR = os.path.dirname(os.path.abspath(__file__)) # Enable CORS for local development and frontend interaction app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) MAX_FILE_SIZE_MB = 2 MAX_LINES = 10000 SKIP_EXTENSIONS = { ".pkl", ".pyc", ".exe", ".dll", ".so", ".zip", ".tar", ".gz", ".jpg", ".jpeg", ".png", ".gif", ".mp4", ".mp3", ".wav", ".pdf", ".bin", ".pt", ".ckpt", ".onnx", ".glb", ".json", ".ico", ".svg" } SKIP_DIRS = { "__pycache__", "node_modules", "venv", ".venv", "env", ".git", "generated", "build", "dist", ".idea", ".vscode", ".next", ".github", "vendor" } class RepoRequest(BaseModel): url: str def merge_repo(repo_url: str): temp_dir = tempfile.mkdtemp() repo_dir = os.path.join(temp_dir, "repo") output_file = os.path.join(temp_dir, "project.txt") try: logger.info(f"Cloning repo: {repo_url}") # Clone repo with a depth of 1 to save time/bandwidth Repo.clone_from(repo_url, repo_dir, depth=1) with open(output_file, "w", encoding="utf-8") as out: out.write(f"Source Repository: {repo_url}\n") out.write("="*50 + "\n\n") for root, dirs, files in os.walk(repo_dir): # Modify dirs in-place to skip unwanted directories dirs[:] = [d for d in dirs if d not in SKIP_DIRS] for file in files: ext = os.path.splitext(file)[1].lower() if ext in SKIP_EXTENSIONS or file.startswith('.'): continue file_path = os.path.join(root, file) # Skip large files try: size_mb = os.path.getsize(file_path) / (1024 * 1024) if size_mb > MAX_FILE_SIZE_MB: continue with open(file_path, "r", encoding="utf-8", errors='ignore') as f: lines = f.readlines() if len(lines) > MAX_LINES: logger.info(f"Skipping {file} due to line count limit") continue rel_path = os.path.relpath(file_path, repo_dir) out.write(f"\n\n===== FILE: {rel_path} =====\n\n") out.writelines(lines) except Exception as e: logger.warning(f"Could not process file {file_path}: {e}") continue return output_file, temp_dir except Exception as e: logger.error(f"Error processing repo: {e}") shutil.rmtree(temp_dir, ignore_errors=True) raise HTTPException(status_code=400, detail=f"Failed to process repository: {str(e)}") @app.post("/download") async def download_repo(req: RepoRequest): if not req.url: raise HTTPException(status_code=400, detail="Repository URL is required") file_path, temp_dir = merge_repo(req.url) if not os.path.exists(file_path): shutil.rmtree(temp_dir, ignore_errors=True) raise HTTPException(status_code=500, detail="Generated file not found") return FileResponse( file_path, media_type="text/plain", filename="project.txt", background=BackgroundTask(shutil.rmtree, temp_dir, ignore_errors=True) ) @app.get("/") async def root(): index_path = os.path.join(BASE_DIR, "index.html") if os.path.exists(index_path): return FileResponse(index_path) return {"message": "Repo2TXT API is running. index.html not found."}