garvitcpp commited on
Commit
1683459
Β·
verified Β·
1 Parent(s): a3dde2f

Update app/services/github_service.py

Browse files
Files changed (1) hide show
  1. app/services/github_service.py +72 -1
app/services/github_service.py CHANGED
@@ -2,7 +2,8 @@ import git
2
  import os
3
  import tempfile
4
  import shutil
5
- from typing import List, Dict
 
6
  from pathlib import Path
7
  import logging
8
 
@@ -22,6 +23,76 @@ class GitHubService:
22
  'vendor', 'target', 'bin', 'obj', '.gradle', '.idea', '.vscode'
23
  }
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  async def clone_repository(self, github_url: str) -> str:
26
  """Clone repository to temporary directory"""
27
  temp_dir = tempfile.mkdtemp(prefix="codequery_")
 
2
  import os
3
  import tempfile
4
  import shutil
5
+ import asyncio
6
+ from typing import List, Dict, Tuple
7
  from pathlib import Path
8
  import logging
9
 
 
23
  'vendor', 'target', 'bin', 'obj', '.gradle', '.idea', '.vscode'
24
  }
25
 
26
+ async def verify_repository(self, github_url: str) -> Tuple[bool, str]:
27
+ """Verify repository accessibility and presence of supported code files before cloning in background."""
28
+ logger.info(f"πŸ” Verifying repository: {github_url}")
29
+
30
+ # 1. Check Accessibility
31
+ try:
32
+ # We use git ls-remote to check if the repo exists and is public without downloading anything.
33
+ # Using $env:GIT_TERMINAL_PROMPT="0" prevents git from hanging and asking for password
34
+ process = await asyncio.create_subprocess_exec(
35
+ "git", "ls-remote", github_url,
36
+ stdout=asyncio.subprocess.PIPE,
37
+ stderr=asyncio.subprocess.PIPE,
38
+ env={**os.environ, "GIT_TERMINAL_PROMPT": "0"}
39
+ )
40
+ stdout, stderr = await process.communicate()
41
+
42
+ if process.returncode != 0:
43
+ logger.warning(f"❌ Verification failed - Repository inaccessible: {stderr.decode()}")
44
+ return False, "Repository is private, misspelled, or does not exist."
45
+
46
+ except Exception as e:
47
+ logger.error(f"❌ Error during git ls-remote: {e}")
48
+ return False, f"Failed to verify repository accessibility: {str(e)}"
49
+
50
+ # 2. Check for supported code extensions
51
+ temp_dir = tempfile.mkdtemp(prefix="codequery_verify_")
52
+ try:
53
+ # Minimal bare clone with filter=blob:none fetches ONLY the file tree, skipping file contents
54
+ process = await asyncio.create_subprocess_exec(
55
+ "git", "clone", "--bare", "--filter=blob:none", "--depth", "1", github_url, temp_dir,
56
+ stdout=asyncio.subprocess.PIPE,
57
+ stderr=asyncio.subprocess.PIPE,
58
+ env={**os.environ, "GIT_TERMINAL_PROMPT": "0"}
59
+ )
60
+ await process.communicate()
61
+
62
+ if process.returncode != 0:
63
+ logger.warning(f"❌ Verification failed during bare clone")
64
+ return False, "Failed to inspect repository files."
65
+
66
+ # List all remote files in the main/master branch tree
67
+ process_ls = await asyncio.create_subprocess_exec(
68
+ "git", "ls-tree", "-r", "HEAD", "--name-only",
69
+ cwd=temp_dir,
70
+ stdout=asyncio.subprocess.PIPE,
71
+ stderr=asyncio.subprocess.PIPE
72
+ )
73
+ stdout_ls, _ = await process_ls.communicate()
74
+
75
+ if process_ls.returncode == 0:
76
+ files = stdout_ls.decode().split('\n')
77
+ # Check if any file matches our supported extensions
78
+ has_code = any(Path(f).suffix.lower() in self.supported_extensions for f in files if f.strip())
79
+
80
+ if not has_code:
81
+ logger.warning(f"❌ Verification failed - No supported code files in {github_url}")
82
+ return False, "Repository does not contain supported code files."
83
+
84
+ logger.info(f"βœ… Repository verification successful for {github_url}")
85
+ return True, "Success"
86
+ else:
87
+ return False, "Failed to read repository file structure."
88
+
89
+ except Exception as e:
90
+ logger.error(f"❌ Error during code extension check: {e}")
91
+ return False, f"Failed to verify repository contents: {str(e)}"
92
+ finally:
93
+ if os.path.exists(temp_dir):
94
+ shutil.rmtree(temp_dir, ignore_errors=True)
95
+
96
  async def clone_repository(self, github_url: str) -> str:
97
  """Clone repository to temporary directory"""
98
  temp_dir = tempfile.mkdtemp(prefix="codequery_")