Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -23,7 +23,7 @@ GITHUB_PRIVATE_KEY = os.getenv("GITHUB_PRIVATE_KEY")
|
|
| 23 |
# In-memory storage for our repository data
|
| 24 |
repo_data_store = {}
|
| 25 |
|
| 26 |
-
# ---
|
| 27 |
|
| 28 |
class CodeProcessor:
|
| 29 |
def __init__(self, model_name='all-MiniLM-L6-v2'):
|
|
@@ -32,24 +32,30 @@ class CodeProcessor:
|
|
| 32 |
print("Model initialized.")
|
| 33 |
|
| 34 |
def parse_python_file(self, file_path):
|
| 35 |
-
"""Parses a Python file to extract functions and their
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
functions = []
|
| 43 |
for node in ast.walk(tree):
|
| 44 |
if isinstance(node, ast.FunctionDef):
|
| 45 |
-
#
|
| 46 |
-
|
| 47 |
-
function_source
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
return functions
|
| 54 |
|
| 55 |
def process_repo(self, repo_path):
|
|
@@ -59,44 +65,44 @@ class CodeProcessor:
|
|
| 59 |
for root, _, files in os.walk(repo_path):
|
| 60 |
for file in files:
|
| 61 |
if file.endswith('.py'):
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
if not all_functions:
|
| 67 |
print("No Python functions found in the repository.")
|
| 68 |
return None, None
|
| 69 |
|
| 70 |
print(f"Found {len(all_functions)} functions. Generating embeddings...")
|
| 71 |
-
# We create embeddings for the source code of each function
|
| 72 |
function_sources = [f["source"] for f in all_functions]
|
| 73 |
embeddings = self.model.encode(function_sources, show_progress_bar=False)
|
| 74 |
|
| 75 |
-
# Create a FAISS index
|
| 76 |
embedding_dim = embeddings.shape[1]
|
| 77 |
index = faiss.IndexFlatL2(embedding_dim)
|
| 78 |
index.add(np.array(embeddings, dtype=np.float32))
|
| 79 |
|
| 80 |
print(f"FAISS index created successfully with {index.ntotal} vectors.")
|
| 81 |
|
| 82 |
-
# We return the index and the metadata (the list of function details)
|
| 83 |
return index, all_functions
|
| 84 |
|
| 85 |
# --- Initialize the processor globally ---
|
| 86 |
-
# This ensures the model is loaded only once when the app starts.
|
| 87 |
code_processor = CodeProcessor()
|
| 88 |
|
| 89 |
|
| 90 |
-
# --- GitHub App Authentication & Repo Management (
|
| 91 |
-
|
| 92 |
def create_jwt(app_id, private_key):
|
| 93 |
-
# (This function is unchanged)
|
| 94 |
now = int(time.time())
|
| 95 |
payload = {"iat": now, "exp": now + (10 * 60), "iss": app_id}
|
| 96 |
return jwt.encode(payload, private_key, algorithm="RS256")
|
| 97 |
|
| 98 |
def get_installation_access_token(installation_id, app_id, private_key):
|
| 99 |
-
# (This function is unchanged)
|
| 100 |
app_jwt = create_jwt(app_id, private_key)
|
| 101 |
headers = {"Authorization": f"Bearer {app_jwt}", "Accept": "application/vnd.github.v3+json"}
|
| 102 |
url = f"https://api.github.com/app/installations/{installation_id}/access_tokens"
|
|
@@ -113,11 +119,9 @@ def process_repository(repo_url, token, repo_full_name):
|
|
| 113 |
clone_url = repo_url.replace("https://", f"https://x-access-token:{token}@")
|
| 114 |
Repo.clone_from(clone_url, temp_dir)
|
| 115 |
|
| 116 |
-
# --- NEW: Use the CodeProcessor ---
|
| 117 |
faiss_index, metadata = code_processor.process_repo(temp_dir)
|
| 118 |
|
| 119 |
if faiss_index:
|
| 120 |
-
# Store the live FAISS index and metadata in our in-memory store
|
| 121 |
repo_data_store[repo_full_name] = {
|
| 122 |
"status": "processed",
|
| 123 |
"faiss_index": faiss_index,
|
|
@@ -130,16 +134,13 @@ def process_repository(repo_url, token, repo_full_name):
|
|
| 130 |
except Exception as e:
|
| 131 |
print(f"Failed to process repository {repo_full_name}: {e}")
|
| 132 |
finally:
|
| 133 |
-
# Clean up the cloned repository from disk
|
| 134 |
print(f"Cleaning up temporary directory: {temp_dir}")
|
| 135 |
shutil.rmtree(temp_dir)
|
| 136 |
|
| 137 |
-
|
| 138 |
# --- FastAPI App (Webhook handler is unchanged) ---
|
| 139 |
app = FastAPI()
|
| 140 |
|
| 141 |
async def verify_signature(request: Request):
|
| 142 |
-
# (This function is unchanged)
|
| 143 |
if not GITHUB_WEBHOOK_SECRET: raise HTTPException(status_code=500, detail="Webhook secret not configured.")
|
| 144 |
signature_header = request.headers.get("X-Hub-Signature-256")
|
| 145 |
if not signature_header: raise HTTPException(status_code=400, detail="X-Hub-Signature-256 header is missing.")
|
|
@@ -154,7 +155,6 @@ def read_root():
|
|
| 154 |
|
| 155 |
@app.post("/api/github/webhook")
|
| 156 |
async def github_webhook(request: Request):
|
| 157 |
-
# (This webhook handler is unchanged from the last step)
|
| 158 |
await verify_signature(request)
|
| 159 |
event_type = request.headers.get("X-GitHub-Event")
|
| 160 |
payload = await request.json()
|
|
|
|
| 23 |
# In-memory storage for our repository data
|
| 24 |
repo_data_store = {}
|
| 25 |
|
| 26 |
+
# --- Code Processing and Vectorization Class ---
|
| 27 |
|
| 28 |
class CodeProcessor:
|
| 29 |
def __init__(self, model_name='all-MiniLM-L6-v2'):
|
|
|
|
| 32 |
print("Model initialized.")
|
| 33 |
|
| 34 |
def parse_python_file(self, file_path):
|
| 35 |
+
"""Parses a Python file to extract functions and their source code using AST."""
|
| 36 |
+
# --- THE FIX IS HERE ---
|
| 37 |
+
# First, read the entire file content into a string variable.
|
| 38 |
+
with open(file_path, 'r', encoding='utf-8') as file:
|
| 39 |
+
source_code = file.read()
|
| 40 |
+
|
| 41 |
+
try:
|
| 42 |
+
# Parse the source code string.
|
| 43 |
+
tree = ast.parse(source_code)
|
| 44 |
+
except SyntaxError:
|
| 45 |
+
print(f"SyntaxError in {file_path}, skipping.")
|
| 46 |
+
return []
|
| 47 |
|
| 48 |
functions = []
|
| 49 |
for node in ast.walk(tree):
|
| 50 |
if isinstance(node, ast.FunctionDef):
|
| 51 |
+
# Now, pass the correct `source_code` string to get the segment.
|
| 52 |
+
function_source = ast.get_source_segment(source_code, node)
|
| 53 |
+
if function_source: # Ensure we got a valid segment
|
| 54 |
+
functions.append({
|
| 55 |
+
"name": node.name,
|
| 56 |
+
"source": function_source,
|
| 57 |
+
"file_path": file_path.replace(tempfile.gettempdir(), "") # Store relative path
|
| 58 |
+
})
|
| 59 |
return functions
|
| 60 |
|
| 61 |
def process_repo(self, repo_path):
|
|
|
|
| 65 |
for root, _, files in os.walk(repo_path):
|
| 66 |
for file in files:
|
| 67 |
if file.endswith('.py'):
|
| 68 |
+
# Create the full path relative to the repo root for parsing
|
| 69 |
+
full_path = os.path.join(root, file)
|
| 70 |
+
# Create a "clean" path for storage in metadata
|
| 71 |
+
clean_path = os.path.relpath(full_path, repo_path)
|
| 72 |
+
|
| 73 |
+
parsed_funcs = self.parse_python_file(full_path)
|
| 74 |
+
for func in parsed_funcs:
|
| 75 |
+
func['file_path'] = clean_path # Overwrite with the clean path
|
| 76 |
+
all_functions.extend(parsed_funcs)
|
| 77 |
|
| 78 |
if not all_functions:
|
| 79 |
print("No Python functions found in the repository.")
|
| 80 |
return None, None
|
| 81 |
|
| 82 |
print(f"Found {len(all_functions)} functions. Generating embeddings...")
|
|
|
|
| 83 |
function_sources = [f["source"] for f in all_functions]
|
| 84 |
embeddings = self.model.encode(function_sources, show_progress_bar=False)
|
| 85 |
|
|
|
|
| 86 |
embedding_dim = embeddings.shape[1]
|
| 87 |
index = faiss.IndexFlatL2(embedding_dim)
|
| 88 |
index.add(np.array(embeddings, dtype=np.float32))
|
| 89 |
|
| 90 |
print(f"FAISS index created successfully with {index.ntotal} vectors.")
|
| 91 |
|
|
|
|
| 92 |
return index, all_functions
|
| 93 |
|
| 94 |
# --- Initialize the processor globally ---
|
|
|
|
| 95 |
code_processor = CodeProcessor()
|
| 96 |
|
| 97 |
|
| 98 |
+
# --- GitHub App Authentication & Repo Management (Unchanged) ---
|
| 99 |
+
# ... (The rest of the file from the previous correct version remains the same) ...
|
| 100 |
def create_jwt(app_id, private_key):
|
|
|
|
| 101 |
now = int(time.time())
|
| 102 |
payload = {"iat": now, "exp": now + (10 * 60), "iss": app_id}
|
| 103 |
return jwt.encode(payload, private_key, algorithm="RS256")
|
| 104 |
|
| 105 |
def get_installation_access_token(installation_id, app_id, private_key):
|
|
|
|
| 106 |
app_jwt = create_jwt(app_id, private_key)
|
| 107 |
headers = {"Authorization": f"Bearer {app_jwt}", "Accept": "application/vnd.github.v3+json"}
|
| 108 |
url = f"https://api.github.com/app/installations/{installation_id}/access_tokens"
|
|
|
|
| 119 |
clone_url = repo_url.replace("https://", f"https://x-access-token:{token}@")
|
| 120 |
Repo.clone_from(clone_url, temp_dir)
|
| 121 |
|
|
|
|
| 122 |
faiss_index, metadata = code_processor.process_repo(temp_dir)
|
| 123 |
|
| 124 |
if faiss_index:
|
|
|
|
| 125 |
repo_data_store[repo_full_name] = {
|
| 126 |
"status": "processed",
|
| 127 |
"faiss_index": faiss_index,
|
|
|
|
| 134 |
except Exception as e:
|
| 135 |
print(f"Failed to process repository {repo_full_name}: {e}")
|
| 136 |
finally:
|
|
|
|
| 137 |
print(f"Cleaning up temporary directory: {temp_dir}")
|
| 138 |
shutil.rmtree(temp_dir)
|
| 139 |
|
|
|
|
| 140 |
# --- FastAPI App (Webhook handler is unchanged) ---
|
| 141 |
app = FastAPI()
|
| 142 |
|
| 143 |
async def verify_signature(request: Request):
|
|
|
|
| 144 |
if not GITHUB_WEBHOOK_SECRET: raise HTTPException(status_code=500, detail="Webhook secret not configured.")
|
| 145 |
signature_header = request.headers.get("X-Hub-Signature-256")
|
| 146 |
if not signature_header: raise HTTPException(status_code=400, detail="X-Hub-Signature-256 header is missing.")
|
|
|
|
| 155 |
|
| 156 |
@app.post("/api/github/webhook")
|
| 157 |
async def github_webhook(request: Request):
|
|
|
|
| 158 |
await verify_signature(request)
|
| 159 |
event_type = request.headers.get("X-GitHub-Event")
|
| 160 |
payload = await request.json()
|