Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,9 +6,13 @@ import jwt
|
|
| 6 |
import requests
|
| 7 |
import tempfile
|
| 8 |
import shutil
|
|
|
|
|
|
|
|
|
|
| 9 |
from fastapi import FastAPI, Request, HTTPException, status
|
| 10 |
from dotenv import load_dotenv
|
| 11 |
from git import Repo
|
|
|
|
| 12 |
|
| 13 |
# --- Configuration ---
|
| 14 |
load_dotenv()
|
|
@@ -16,65 +20,133 @@ GITHUB_WEBHOOK_SECRET = os.getenv("GITHUB_WEBHOOK_SECRET")
|
|
| 16 |
GITHUB_APP_ID = os.getenv("GITHUB_APP_ID")
|
| 17 |
GITHUB_PRIVATE_KEY = os.getenv("GITHUB_PRIVATE_KEY")
|
| 18 |
|
| 19 |
-
# In-memory storage for our repository data
|
| 20 |
repo_data_store = {}
|
| 21 |
|
| 22 |
-
# ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
def create_jwt(app_id, private_key):
|
|
|
|
| 25 |
now = int(time.time())
|
| 26 |
-
payload = {
|
| 27 |
-
"iat": now,
|
| 28 |
-
"exp": now + (10 * 60),
|
| 29 |
-
"iss": app_id,
|
| 30 |
-
}
|
| 31 |
return jwt.encode(payload, private_key, algorithm="RS256")
|
| 32 |
|
| 33 |
def get_installation_access_token(installation_id, app_id, private_key):
|
|
|
|
| 34 |
app_jwt = create_jwt(app_id, private_key)
|
| 35 |
-
headers = {
|
| 36 |
-
"Authorization": f"Bearer {app_jwt}",
|
| 37 |
-
"Accept": "application/vnd.github.v3+json",
|
| 38 |
-
}
|
| 39 |
url = f"https://api.github.com/app/installations/{installation_id}/access_tokens"
|
| 40 |
response = requests.post(url, headers=headers)
|
| 41 |
response.raise_for_status()
|
| 42 |
return response.json()["token"]
|
| 43 |
|
| 44 |
-
# --- Repository Management Logic (Unchanged) ---
|
| 45 |
-
|
| 46 |
def process_repository(repo_url, token, repo_full_name):
|
|
|
|
| 47 |
temp_dir = tempfile.mkdtemp()
|
| 48 |
-
print(f"Cloning {repo_full_name} into {temp_dir}")
|
| 49 |
|
| 50 |
try:
|
| 51 |
clone_url = repo_url.replace("https://", f"https://x-access-token:{token}@")
|
| 52 |
Repo.clone_from(clone_url, temp_dir)
|
| 53 |
|
| 54 |
-
|
| 55 |
-
|
| 56 |
|
| 57 |
-
|
| 58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
except Exception as e:
|
| 61 |
print(f"Failed to process repository {repo_full_name}: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
-
# --- FastAPI App ---
|
| 64 |
app = FastAPI()
|
| 65 |
|
| 66 |
async def verify_signature(request: Request):
|
| 67 |
-
# (This function
|
| 68 |
-
if not GITHUB_WEBHOOK_SECRET:
|
| 69 |
-
raise HTTPException(status_code=500, detail="Webhook secret not configured.")
|
| 70 |
signature_header = request.headers.get("X-Hub-Signature-256")
|
| 71 |
-
if not signature_header:
|
| 72 |
-
raise HTTPException(status_code=400, detail="X-Hub-Signature-256 header is missing.")
|
| 73 |
body = await request.body()
|
| 74 |
sha_name, signature = signature_header.split("=")
|
| 75 |
mac = hmac.new(GITHUB_WEBHOOK_SECRET.encode("utf-8"), msg=body, digestmod=hashlib.sha256)
|
| 76 |
-
if not hmac.compare_digest(mac.hexdigest(), signature):
|
| 77 |
-
raise HTTPException(status_code=400, detail="Invalid signature.")
|
| 78 |
|
| 79 |
@app.get("/")
|
| 80 |
def read_root():
|
|
@@ -82,46 +154,28 @@ def read_root():
|
|
| 82 |
|
| 83 |
@app.post("/api/github/webhook")
|
| 84 |
async def github_webhook(request: Request):
|
|
|
|
| 85 |
await verify_signature(request)
|
| 86 |
-
|
| 87 |
event_type = request.headers.get("X-GitHub-Event")
|
| 88 |
payload = await request.json()
|
| 89 |
-
|
| 90 |
print(f"Received event: {event_type} with action: {payload.get('action')}")
|
| 91 |
-
|
| 92 |
installation_id = payload.get("installation", {}).get("id")
|
| 93 |
-
if not installation_id:
|
| 94 |
-
return {"status": "ok", "message": "Event does not pertain to an installation."}
|
| 95 |
-
|
| 96 |
-
# --- REVISED LOGIC ---
|
| 97 |
repos_to_process = []
|
| 98 |
-
|
| 99 |
if event_type == "installation" and payload.get("action") == "created":
|
| 100 |
-
# This handles the very first time the app is installed on an account.
|
| 101 |
repos_to_process = payload.get("repositories", [])
|
| 102 |
-
print(f"Processing 'installation.created' event for repos: {[repo['full_name'] for repo in repos_to_process]}")
|
| 103 |
-
|
| 104 |
elif event_type == "installation_repositories" and payload.get("action") == "added":
|
| 105 |
-
# This handles when a user adds one or more repos to an existing installation.
|
| 106 |
repos_to_process = payload.get("repositories_added", [])
|
| 107 |
-
print(f"Processing 'installation_repositories.added' event for repos: {[repo['full_name'] for repo in repos_to_process]}")
|
| 108 |
-
|
| 109 |
if repos_to_process:
|
| 110 |
try:
|
| 111 |
token = get_installation_access_token(installation_id, GITHUB_APP_ID, GITHUB_PRIVATE_KEY)
|
| 112 |
for repo in repos_to_process:
|
| 113 |
repo_full_name = repo["full_name"]
|
| 114 |
-
# The 'html_url' key is not present in the 'installation_repositories' event,
|
| 115 |
-
# so we construct it manually.
|
| 116 |
repo_url = f"https://github.com/{repo_full_name}"
|
| 117 |
process_repository(repo_url, token, repo_full_name)
|
| 118 |
except Exception as e:
|
| 119 |
print(f"Error during repository processing: {e}")
|
| 120 |
-
# Don't raise HTTPException here to prevent GitHub from disabling the webhook
|
| 121 |
-
|
| 122 |
-
# Handle push events (will be used in Step 4)
|
| 123 |
elif event_type == "push":
|
| 124 |
repo_name = payload.get("repository", {}).get("full_name")
|
| 125 |
print(f"Received a push event on repo {repo_name}")
|
| 126 |
-
|
| 127 |
return {"status": "ok"}
|
|
|
|
| 6 |
import requests
|
| 7 |
import tempfile
|
| 8 |
import shutil
|
| 9 |
+
import ast
|
| 10 |
+
import faiss
|
| 11 |
+
import numpy as np
|
| 12 |
from fastapi import FastAPI, Request, HTTPException, status
|
| 13 |
from dotenv import load_dotenv
|
| 14 |
from git import Repo
|
| 15 |
+
from sentence_transformers import SentenceTransformer
|
| 16 |
|
| 17 |
# --- Configuration ---
|
| 18 |
load_dotenv()
|
|
|
|
| 20 |
GITHUB_APP_ID = os.getenv("GITHUB_APP_ID")
|
| 21 |
GITHUB_PRIVATE_KEY = os.getenv("GITHUB_PRIVATE_KEY")
|
| 22 |
|
| 23 |
+
# In-memory storage for our repository data
|
| 24 |
repo_data_store = {}
|
| 25 |
|
| 26 |
+
# --- NEW: Code Processing and Vectorization Class ---
|
| 27 |
+
|
| 28 |
+
class CodeProcessor:
|
| 29 |
+
def __init__(self, model_name='all-MiniLM-L6-v2'):
|
| 30 |
+
print("Initializing SentenceTransformer model...")
|
| 31 |
+
self.model = SentenceTransformer(model_name)
|
| 32 |
+
print("Model initialized.")
|
| 33 |
+
|
| 34 |
+
def parse_python_file(self, file_path):
|
| 35 |
+
"""Parses a Python file to extract functions and their docstrings using AST."""
|
| 36 |
+
with open(file_path, 'r', encoding='utf-8') as source:
|
| 37 |
+
try:
|
| 38 |
+
tree = ast.parse(source.read())
|
| 39 |
+
except SyntaxError:
|
| 40 |
+
return [] # Ignore files with syntax errors
|
| 41 |
+
|
| 42 |
+
functions = []
|
| 43 |
+
for node in ast.walk(tree):
|
| 44 |
+
if isinstance(node, ast.FunctionDef):
|
| 45 |
+
# We extract the function's name and its full source code
|
| 46 |
+
# The source code provides more context than just the docstring
|
| 47 |
+
function_source = ast.get_source_segment(tree, node)
|
| 48 |
+
functions.append({
|
| 49 |
+
"name": node.name,
|
| 50 |
+
"source": function_source,
|
| 51 |
+
"file_path": file_path
|
| 52 |
+
})
|
| 53 |
+
return functions
|
| 54 |
+
|
| 55 |
+
def process_repo(self, repo_path):
|
| 56 |
+
"""Walks a repository, parses Python files, and creates a FAISS index."""
|
| 57 |
+
all_functions = []
|
| 58 |
+
print(f"Walking through Python files in {repo_path}...")
|
| 59 |
+
for root, _, files in os.walk(repo_path):
|
| 60 |
+
for file in files:
|
| 61 |
+
if file.endswith('.py'):
|
| 62 |
+
file_path = os.path.join(root, file)
|
| 63 |
+
functions = self.parse_python_file(file_path)
|
| 64 |
+
all_functions.extend(functions)
|
| 65 |
+
|
| 66 |
+
if not all_functions:
|
| 67 |
+
print("No Python functions found in the repository.")
|
| 68 |
+
return None, None
|
| 69 |
+
|
| 70 |
+
print(f"Found {len(all_functions)} functions. Generating embeddings...")
|
| 71 |
+
# We create embeddings for the source code of each function
|
| 72 |
+
function_sources = [f["source"] for f in all_functions]
|
| 73 |
+
embeddings = self.model.encode(function_sources, show_progress_bar=False)
|
| 74 |
+
|
| 75 |
+
# Create a FAISS index
|
| 76 |
+
embedding_dim = embeddings.shape[1]
|
| 77 |
+
index = faiss.IndexFlatL2(embedding_dim)
|
| 78 |
+
index.add(np.array(embeddings, dtype=np.float32))
|
| 79 |
+
|
| 80 |
+
print(f"FAISS index created successfully with {index.ntotal} vectors.")
|
| 81 |
+
|
| 82 |
+
# We return the index and the metadata (the list of function details)
|
| 83 |
+
return index, all_functions
|
| 84 |
+
|
| 85 |
+
# --- Initialize the processor globally ---
|
| 86 |
+
# This ensures the model is loaded only once when the app starts.
|
| 87 |
+
code_processor = CodeProcessor()
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
# --- GitHub App Authentication & Repo Management (Largely Unchanged) ---
|
| 91 |
|
| 92 |
def create_jwt(app_id, private_key):
|
| 93 |
+
# (This function is unchanged)
|
| 94 |
now = int(time.time())
|
| 95 |
+
payload = {"iat": now, "exp": now + (10 * 60), "iss": app_id}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
return jwt.encode(payload, private_key, algorithm="RS256")
|
| 97 |
|
| 98 |
def get_installation_access_token(installation_id, app_id, private_key):
|
| 99 |
+
# (This function is unchanged)
|
| 100 |
app_jwt = create_jwt(app_id, private_key)
|
| 101 |
+
headers = {"Authorization": f"Bearer {app_jwt}", "Accept": "application/vnd.github.v3+json"}
|
|
|
|
|
|
|
|
|
|
| 102 |
url = f"https://api.github.com/app/installations/{installation_id}/access_tokens"
|
| 103 |
response = requests.post(url, headers=headers)
|
| 104 |
response.raise_for_status()
|
| 105 |
return response.json()["token"]
|
| 106 |
|
|
|
|
|
|
|
| 107 |
def process_repository(repo_url, token, repo_full_name):
|
| 108 |
+
"""Clones a repo and hands it off to the CodeProcessor."""
|
| 109 |
temp_dir = tempfile.mkdtemp()
|
| 110 |
+
print(f"Cloning {repo_full_name} into {temp_dir}...")
|
| 111 |
|
| 112 |
try:
|
| 113 |
clone_url = repo_url.replace("https://", f"https://x-access-token:{token}@")
|
| 114 |
Repo.clone_from(clone_url, temp_dir)
|
| 115 |
|
| 116 |
+
# --- NEW: Use the CodeProcessor ---
|
| 117 |
+
faiss_index, metadata = code_processor.process_repo(temp_dir)
|
| 118 |
|
| 119 |
+
if faiss_index:
|
| 120 |
+
# Store the live FAISS index and metadata in our in-memory store
|
| 121 |
+
repo_data_store[repo_full_name] = {
|
| 122 |
+
"status": "processed",
|
| 123 |
+
"faiss_index": faiss_index,
|
| 124 |
+
"metadata": metadata
|
| 125 |
+
}
|
| 126 |
+
print(f"Successfully processed and indexed {repo_full_name}")
|
| 127 |
+
else:
|
| 128 |
+
print(f"No processable data found for {repo_full_name}")
|
| 129 |
|
| 130 |
except Exception as e:
|
| 131 |
print(f"Failed to process repository {repo_full_name}: {e}")
|
| 132 |
+
finally:
|
| 133 |
+
# Clean up the cloned repository from disk
|
| 134 |
+
print(f"Cleaning up temporary directory: {temp_dir}")
|
| 135 |
+
shutil.rmtree(temp_dir)
|
| 136 |
+
|
| 137 |
|
| 138 |
+
# --- FastAPI App (Webhook handler is unchanged) ---
|
| 139 |
app = FastAPI()
|
| 140 |
|
| 141 |
async def verify_signature(request: Request):
|
| 142 |
+
# (This function is unchanged)
|
| 143 |
+
if not GITHUB_WEBHOOK_SECRET: raise HTTPException(status_code=500, detail="Webhook secret not configured.")
|
|
|
|
| 144 |
signature_header = request.headers.get("X-Hub-Signature-256")
|
| 145 |
+
if not signature_header: raise HTTPException(status_code=400, detail="X-Hub-Signature-256 header is missing.")
|
|
|
|
| 146 |
body = await request.body()
|
| 147 |
sha_name, signature = signature_header.split("=")
|
| 148 |
mac = hmac.new(GITHUB_WEBHOOK_SECRET.encode("utf-8"), msg=body, digestmod=hashlib.sha256)
|
| 149 |
+
if not hmac.compare_digest(mac.hexdigest(), signature): raise HTTPException(status_code=400, detail="Invalid signature.")
|
|
|
|
| 150 |
|
| 151 |
@app.get("/")
|
| 152 |
def read_root():
|
|
|
|
| 154 |
|
| 155 |
@app.post("/api/github/webhook")
|
| 156 |
async def github_webhook(request: Request):
|
| 157 |
+
# (This webhook handler is unchanged from the last step)
|
| 158 |
await verify_signature(request)
|
|
|
|
| 159 |
event_type = request.headers.get("X-GitHub-Event")
|
| 160 |
payload = await request.json()
|
|
|
|
| 161 |
print(f"Received event: {event_type} with action: {payload.get('action')}")
|
|
|
|
| 162 |
installation_id = payload.get("installation", {}).get("id")
|
| 163 |
+
if not installation_id: return {"status": "ok", "message": "Event does not pertain to an installation."}
|
|
|
|
|
|
|
|
|
|
| 164 |
repos_to_process = []
|
|
|
|
| 165 |
if event_type == "installation" and payload.get("action") == "created":
|
|
|
|
| 166 |
repos_to_process = payload.get("repositories", [])
|
|
|
|
|
|
|
| 167 |
elif event_type == "installation_repositories" and payload.get("action") == "added":
|
|
|
|
| 168 |
repos_to_process = payload.get("repositories_added", [])
|
|
|
|
|
|
|
| 169 |
if repos_to_process:
|
| 170 |
try:
|
| 171 |
token = get_installation_access_token(installation_id, GITHUB_APP_ID, GITHUB_PRIVATE_KEY)
|
| 172 |
for repo in repos_to_process:
|
| 173 |
repo_full_name = repo["full_name"]
|
|
|
|
|
|
|
| 174 |
repo_url = f"https://github.com/{repo_full_name}"
|
| 175 |
process_repository(repo_url, token, repo_full_name)
|
| 176 |
except Exception as e:
|
| 177 |
print(f"Error during repository processing: {e}")
|
|
|
|
|
|
|
|
|
|
| 178 |
elif event_type == "push":
|
| 179 |
repo_name = payload.get("repository", {}).get("full_name")
|
| 180 |
print(f"Received a push event on repo {repo_name}")
|
|
|
|
| 181 |
return {"status": "ok"}
|