Update app/services/github_service.py
Browse files
app/services/github_service.py
CHANGED
|
@@ -2,7 +2,8 @@ import git
|
|
| 2 |
import os
|
| 3 |
import tempfile
|
| 4 |
import shutil
|
| 5 |
-
|
|
|
|
| 6 |
from pathlib import Path
|
| 7 |
import logging
|
| 8 |
|
|
@@ -22,6 +23,76 @@ class GitHubService:
|
|
| 22 |
'vendor', 'target', 'bin', 'obj', '.gradle', '.idea', '.vscode'
|
| 23 |
}
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
async def clone_repository(self, github_url: str) -> str:
|
| 26 |
"""Clone repository to temporary directory"""
|
| 27 |
temp_dir = tempfile.mkdtemp(prefix="codequery_")
|
|
|
|
| 2 |
import os
|
| 3 |
import tempfile
|
| 4 |
import shutil
|
| 5 |
+
import asyncio
|
| 6 |
+
from typing import List, Dict, Tuple
|
| 7 |
from pathlib import Path
|
| 8 |
import logging
|
| 9 |
|
|
|
|
| 23 |
'vendor', 'target', 'bin', 'obj', '.gradle', '.idea', '.vscode'
|
| 24 |
}
|
| 25 |
|
| 26 |
+
async def verify_repository(self, github_url: str) -> Tuple[bool, str]:
|
| 27 |
+
"""Verify repository accessibility and presence of supported code files before cloning in background."""
|
| 28 |
+
logger.info(f"π Verifying repository: {github_url}")
|
| 29 |
+
|
| 30 |
+
# 1. Check Accessibility
|
| 31 |
+
try:
|
| 32 |
+
# We use git ls-remote to check if the repo exists and is public without downloading anything.
|
| 33 |
+
# Using $env:GIT_TERMINAL_PROMPT="0" prevents git from hanging and asking for password
|
| 34 |
+
process = await asyncio.create_subprocess_exec(
|
| 35 |
+
"git", "ls-remote", github_url,
|
| 36 |
+
stdout=asyncio.subprocess.PIPE,
|
| 37 |
+
stderr=asyncio.subprocess.PIPE,
|
| 38 |
+
env={**os.environ, "GIT_TERMINAL_PROMPT": "0"}
|
| 39 |
+
)
|
| 40 |
+
stdout, stderr = await process.communicate()
|
| 41 |
+
|
| 42 |
+
if process.returncode != 0:
|
| 43 |
+
logger.warning(f"β Verification failed - Repository inaccessible: {stderr.decode()}")
|
| 44 |
+
return False, "Repository is private, misspelled, or does not exist."
|
| 45 |
+
|
| 46 |
+
except Exception as e:
|
| 47 |
+
logger.error(f"β Error during git ls-remote: {e}")
|
| 48 |
+
return False, f"Failed to verify repository accessibility: {str(e)}"
|
| 49 |
+
|
| 50 |
+
# 2. Check for supported code extensions
|
| 51 |
+
temp_dir = tempfile.mkdtemp(prefix="codequery_verify_")
|
| 52 |
+
try:
|
| 53 |
+
# Minimal bare clone with filter=blob:none fetches ONLY the file tree, skipping file contents
|
| 54 |
+
process = await asyncio.create_subprocess_exec(
|
| 55 |
+
"git", "clone", "--bare", "--filter=blob:none", "--depth", "1", github_url, temp_dir,
|
| 56 |
+
stdout=asyncio.subprocess.PIPE,
|
| 57 |
+
stderr=asyncio.subprocess.PIPE,
|
| 58 |
+
env={**os.environ, "GIT_TERMINAL_PROMPT": "0"}
|
| 59 |
+
)
|
| 60 |
+
await process.communicate()
|
| 61 |
+
|
| 62 |
+
if process.returncode != 0:
|
| 63 |
+
logger.warning(f"β Verification failed during bare clone")
|
| 64 |
+
return False, "Failed to inspect repository files."
|
| 65 |
+
|
| 66 |
+
# List all remote files in the main/master branch tree
|
| 67 |
+
process_ls = await asyncio.create_subprocess_exec(
|
| 68 |
+
"git", "ls-tree", "-r", "HEAD", "--name-only",
|
| 69 |
+
cwd=temp_dir,
|
| 70 |
+
stdout=asyncio.subprocess.PIPE,
|
| 71 |
+
stderr=asyncio.subprocess.PIPE
|
| 72 |
+
)
|
| 73 |
+
stdout_ls, _ = await process_ls.communicate()
|
| 74 |
+
|
| 75 |
+
if process_ls.returncode == 0:
|
| 76 |
+
files = stdout_ls.decode().split('\n')
|
| 77 |
+
# Check if any file matches our supported extensions
|
| 78 |
+
has_code = any(Path(f).suffix.lower() in self.supported_extensions for f in files if f.strip())
|
| 79 |
+
|
| 80 |
+
if not has_code:
|
| 81 |
+
logger.warning(f"β Verification failed - No supported code files in {github_url}")
|
| 82 |
+
return False, "Repository does not contain supported code files."
|
| 83 |
+
|
| 84 |
+
logger.info(f"β
Repository verification successful for {github_url}")
|
| 85 |
+
return True, "Success"
|
| 86 |
+
else:
|
| 87 |
+
return False, "Failed to read repository file structure."
|
| 88 |
+
|
| 89 |
+
except Exception as e:
|
| 90 |
+
logger.error(f"β Error during code extension check: {e}")
|
| 91 |
+
return False, f"Failed to verify repository contents: {str(e)}"
|
| 92 |
+
finally:
|
| 93 |
+
if os.path.exists(temp_dir):
|
| 94 |
+
shutil.rmtree(temp_dir, ignore_errors=True)
|
| 95 |
+
|
| 96 |
async def clone_repository(self, github_url: str) -> str:
|
| 97 |
"""Clone repository to temporary directory"""
|
| 98 |
temp_dir = tempfile.mkdtemp(prefix="codequery_")
|