File size: 4,262 Bytes
d5fdeca | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 | import os
import shutil
import tempfile
from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import FileResponse, HTMLResponse
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from starlette.background import BackgroundTask
from pydantic import BaseModel, HttpUrl
from git import Repo
import logging
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
app = FastAPI(title="Repo2TXT API")
# Path to the directory where index.html is located
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
# Enable CORS for local development and frontend interaction
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
MAX_FILE_SIZE_MB = 2
MAX_LINES = 10000
SKIP_EXTENSIONS = {
".pkl", ".pyc", ".exe", ".dll", ".so", ".zip", ".tar", ".gz",
".jpg", ".jpeg", ".png", ".gif", ".mp4", ".mp3", ".wav", ".pdf",
".bin", ".pt", ".ckpt", ".onnx", ".glb", ".json", ".ico", ".svg"
}
SKIP_DIRS = {
"__pycache__", "node_modules", "venv", ".venv",
"env", ".git", "generated", "build", "dist",
".idea", ".vscode", ".next", ".github", "vendor"
}
class RepoRequest(BaseModel):
url: str
def merge_repo(repo_url: str):
temp_dir = tempfile.mkdtemp()
repo_dir = os.path.join(temp_dir, "repo")
output_file = os.path.join(temp_dir, "project.txt")
try:
logger.info(f"Cloning repo: {repo_url}")
# Clone repo with a depth of 1 to save time/bandwidth
Repo.clone_from(repo_url, repo_dir, depth=1)
with open(output_file, "w", encoding="utf-8") as out:
out.write(f"Source Repository: {repo_url}\n")
out.write("="*50 + "\n\n")
for root, dirs, files in os.walk(repo_dir):
# Modify dirs in-place to skip unwanted directories
dirs[:] = [d for d in dirs if d not in SKIP_DIRS]
for file in files:
ext = os.path.splitext(file)[1].lower()
if ext in SKIP_EXTENSIONS or file.startswith('.'):
continue
file_path = os.path.join(root, file)
# Skip large files
try:
size_mb = os.path.getsize(file_path) / (1024 * 1024)
if size_mb > MAX_FILE_SIZE_MB:
continue
with open(file_path, "r", encoding="utf-8", errors='ignore') as f:
lines = f.readlines()
if len(lines) > MAX_LINES:
logger.info(f"Skipping {file} due to line count limit")
continue
rel_path = os.path.relpath(file_path, repo_dir)
out.write(f"\n\n===== FILE: {rel_path} =====\n\n")
out.writelines(lines)
except Exception as e:
logger.warning(f"Could not process file {file_path}: {e}")
continue
return output_file, temp_dir
except Exception as e:
logger.error(f"Error processing repo: {e}")
shutil.rmtree(temp_dir, ignore_errors=True)
raise HTTPException(status_code=400, detail=f"Failed to process repository: {str(e)}")
@app.post("/download")
async def download_repo(req: RepoRequest):
if not req.url:
raise HTTPException(status_code=400, detail="Repository URL is required")
file_path, temp_dir = merge_repo(req.url)
if not os.path.exists(file_path):
shutil.rmtree(temp_dir, ignore_errors=True)
raise HTTPException(status_code=500, detail="Generated file not found")
return FileResponse(
file_path,
media_type="text/plain",
filename="project.txt",
background=BackgroundTask(shutil.rmtree, temp_dir, ignore_errors=True)
)
@app.get("/")
async def root():
index_path = os.path.join(BASE_DIR, "index.html")
if os.path.exists(index_path):
return FileResponse(index_path)
return {"message": "Repo2TXT API is running. index.html not found."}
|