SharathReddy commited on
Commit
6f67ae7
·
verified ·
1 Parent(s): 6374874

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -46
app.py CHANGED
@@ -6,9 +6,13 @@ import jwt
6
  import requests
7
  import tempfile
8
  import shutil
 
 
 
9
  from fastapi import FastAPI, Request, HTTPException, status
10
  from dotenv import load_dotenv
11
  from git import Repo
 
12
 
13
  # --- Configuration ---
14
  load_dotenv()
@@ -16,65 +20,133 @@ GITHUB_WEBHOOK_SECRET = os.getenv("GITHUB_WEBHOOK_SECRET")
16
  GITHUB_APP_ID = os.getenv("GITHUB_APP_ID")
17
  GITHUB_PRIVATE_KEY = os.getenv("GITHUB_PRIVATE_KEY")
18
 
19
- # In-memory storage for our repository data (for this simple version)
20
  repo_data_store = {}
21
 
22
- # --- GitHub App Authentication Logic (Unchanged) ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  def create_jwt(app_id, private_key):
 
25
  now = int(time.time())
26
- payload = {
27
- "iat": now,
28
- "exp": now + (10 * 60),
29
- "iss": app_id,
30
- }
31
  return jwt.encode(payload, private_key, algorithm="RS256")
32
 
33
  def get_installation_access_token(installation_id, app_id, private_key):
 
34
  app_jwt = create_jwt(app_id, private_key)
35
- headers = {
36
- "Authorization": f"Bearer {app_jwt}",
37
- "Accept": "application/vnd.github.v3+json",
38
- }
39
  url = f"https://api.github.com/app/installations/{installation_id}/access_tokens"
40
  response = requests.post(url, headers=headers)
41
  response.raise_for_status()
42
  return response.json()["token"]
43
 
44
- # --- Repository Management Logic (Unchanged) ---
45
-
46
  def process_repository(repo_url, token, repo_full_name):
 
47
  temp_dir = tempfile.mkdtemp()
48
- print(f"Cloning {repo_full_name} into {temp_dir}")
49
 
50
  try:
51
  clone_url = repo_url.replace("https://", f"https://x-access-token:{token}@")
52
  Repo.clone_from(clone_url, temp_dir)
53
 
54
- print(f"Parsing repository {repo_full_name}...")
55
- # In the next step, we will add AST parsing and vectorization here.
56
 
57
- repo_data_store[repo_full_name] = {"status": "processed", "path": temp_dir}
58
- print(f"Successfully processed and 'stored' data for {repo_full_name}")
 
 
 
 
 
 
 
 
59
 
60
  except Exception as e:
61
  print(f"Failed to process repository {repo_full_name}: {e}")
 
 
 
 
 
62
 
63
- # --- FastAPI App ---
64
  app = FastAPI()
65
 
66
  async def verify_signature(request: Request):
67
- # (This function remains unchanged)
68
- if not GITHUB_WEBHOOK_SECRET:
69
- raise HTTPException(status_code=500, detail="Webhook secret not configured.")
70
  signature_header = request.headers.get("X-Hub-Signature-256")
71
- if not signature_header:
72
- raise HTTPException(status_code=400, detail="X-Hub-Signature-256 header is missing.")
73
  body = await request.body()
74
  sha_name, signature = signature_header.split("=")
75
  mac = hmac.new(GITHUB_WEBHOOK_SECRET.encode("utf-8"), msg=body, digestmod=hashlib.sha256)
76
- if not hmac.compare_digest(mac.hexdigest(), signature):
77
- raise HTTPException(status_code=400, detail="Invalid signature.")
78
 
79
  @app.get("/")
80
  def read_root():
@@ -82,46 +154,28 @@ def read_root():
82
 
83
  @app.post("/api/github/webhook")
84
  async def github_webhook(request: Request):
 
85
  await verify_signature(request)
86
-
87
  event_type = request.headers.get("X-GitHub-Event")
88
  payload = await request.json()
89
-
90
  print(f"Received event: {event_type} with action: {payload.get('action')}")
91
-
92
  installation_id = payload.get("installation", {}).get("id")
93
- if not installation_id:
94
- return {"status": "ok", "message": "Event does not pertain to an installation."}
95
-
96
- # --- REVISED LOGIC ---
97
  repos_to_process = []
98
-
99
  if event_type == "installation" and payload.get("action") == "created":
100
- # This handles the very first time the app is installed on an account.
101
  repos_to_process = payload.get("repositories", [])
102
- print(f"Processing 'installation.created' event for repos: {[repo['full_name'] for repo in repos_to_process]}")
103
-
104
  elif event_type == "installation_repositories" and payload.get("action") == "added":
105
- # This handles when a user adds one or more repos to an existing installation.
106
  repos_to_process = payload.get("repositories_added", [])
107
- print(f"Processing 'installation_repositories.added' event for repos: {[repo['full_name'] for repo in repos_to_process]}")
108
-
109
  if repos_to_process:
110
  try:
111
  token = get_installation_access_token(installation_id, GITHUB_APP_ID, GITHUB_PRIVATE_KEY)
112
  for repo in repos_to_process:
113
  repo_full_name = repo["full_name"]
114
- # The 'html_url' key is not present in the 'installation_repositories' event,
115
- # so we construct it manually.
116
  repo_url = f"https://github.com/{repo_full_name}"
117
  process_repository(repo_url, token, repo_full_name)
118
  except Exception as e:
119
  print(f"Error during repository processing: {e}")
120
- # Don't raise HTTPException here to prevent GitHub from disabling the webhook
121
-
122
- # Handle push events (will be used in Step 4)
123
  elif event_type == "push":
124
  repo_name = payload.get("repository", {}).get("full_name")
125
  print(f"Received a push event on repo {repo_name}")
126
-
127
  return {"status": "ok"}
 
6
  import requests
7
  import tempfile
8
  import shutil
9
+ import ast
10
+ import faiss
11
+ import numpy as np
12
  from fastapi import FastAPI, Request, HTTPException, status
13
  from dotenv import load_dotenv
14
  from git import Repo
15
+ from sentence_transformers import SentenceTransformer
16
 
17
  # --- Configuration ---
18
  load_dotenv()
 
20
  GITHUB_APP_ID = os.getenv("GITHUB_APP_ID")
21
  GITHUB_PRIVATE_KEY = os.getenv("GITHUB_PRIVATE_KEY")
22
 
23
+ # In-memory storage for our repository data
24
  repo_data_store = {}
25
 
26
+ # --- NEW: Code Processing and Vectorization Class ---
27
+
28
+ class CodeProcessor:
29
+ def __init__(self, model_name='all-MiniLM-L6-v2'):
30
+ print("Initializing SentenceTransformer model...")
31
+ self.model = SentenceTransformer(model_name)
32
+ print("Model initialized.")
33
+
34
+ def parse_python_file(self, file_path):
35
+ """Parses a Python file to extract functions and their docstrings using AST."""
36
+ with open(file_path, 'r', encoding='utf-8') as source:
37
+ try:
38
+ tree = ast.parse(source.read())
39
+ except SyntaxError:
40
+ return [] # Ignore files with syntax errors
41
+
42
+ functions = []
43
+ for node in ast.walk(tree):
44
+ if isinstance(node, ast.FunctionDef):
45
+ # We extract the function's name and its full source code
46
+ # The source code provides more context than just the docstring
47
+ function_source = ast.get_source_segment(tree, node)
48
+ functions.append({
49
+ "name": node.name,
50
+ "source": function_source,
51
+ "file_path": file_path
52
+ })
53
+ return functions
54
+
55
+ def process_repo(self, repo_path):
56
+ """Walks a repository, parses Python files, and creates a FAISS index."""
57
+ all_functions = []
58
+ print(f"Walking through Python files in {repo_path}...")
59
+ for root, _, files in os.walk(repo_path):
60
+ for file in files:
61
+ if file.endswith('.py'):
62
+ file_path = os.path.join(root, file)
63
+ functions = self.parse_python_file(file_path)
64
+ all_functions.extend(functions)
65
+
66
+ if not all_functions:
67
+ print("No Python functions found in the repository.")
68
+ return None, None
69
+
70
+ print(f"Found {len(all_functions)} functions. Generating embeddings...")
71
+ # We create embeddings for the source code of each function
72
+ function_sources = [f["source"] for f in all_functions]
73
+ embeddings = self.model.encode(function_sources, show_progress_bar=False)
74
+
75
+ # Create a FAISS index
76
+ embedding_dim = embeddings.shape[1]
77
+ index = faiss.IndexFlatL2(embedding_dim)
78
+ index.add(np.array(embeddings, dtype=np.float32))
79
+
80
+ print(f"FAISS index created successfully with {index.ntotal} vectors.")
81
+
82
+ # We return the index and the metadata (the list of function details)
83
+ return index, all_functions
84
+
85
+ # --- Initialize the processor globally ---
86
+ # This ensures the model is loaded only once when the app starts.
87
+ code_processor = CodeProcessor()
88
+
89
+
90
+ # --- GitHub App Authentication & Repo Management (Largely Unchanged) ---
91
 
92
  def create_jwt(app_id, private_key):
93
+ # (This function is unchanged)
94
  now = int(time.time())
95
+ payload = {"iat": now, "exp": now + (10 * 60), "iss": app_id}
 
 
 
 
96
  return jwt.encode(payload, private_key, algorithm="RS256")
97
 
98
  def get_installation_access_token(installation_id, app_id, private_key):
99
+ # (This function is unchanged)
100
  app_jwt = create_jwt(app_id, private_key)
101
+ headers = {"Authorization": f"Bearer {app_jwt}", "Accept": "application/vnd.github.v3+json"}
 
 
 
102
  url = f"https://api.github.com/app/installations/{installation_id}/access_tokens"
103
  response = requests.post(url, headers=headers)
104
  response.raise_for_status()
105
  return response.json()["token"]
106
 
 
 
107
  def process_repository(repo_url, token, repo_full_name):
108
+ """Clones a repo and hands it off to the CodeProcessor."""
109
  temp_dir = tempfile.mkdtemp()
110
+ print(f"Cloning {repo_full_name} into {temp_dir}...")
111
 
112
  try:
113
  clone_url = repo_url.replace("https://", f"https://x-access-token:{token}@")
114
  Repo.clone_from(clone_url, temp_dir)
115
 
116
+ # --- NEW: Use the CodeProcessor ---
117
+ faiss_index, metadata = code_processor.process_repo(temp_dir)
118
 
119
+ if faiss_index:
120
+ # Store the live FAISS index and metadata in our in-memory store
121
+ repo_data_store[repo_full_name] = {
122
+ "status": "processed",
123
+ "faiss_index": faiss_index,
124
+ "metadata": metadata
125
+ }
126
+ print(f"Successfully processed and indexed {repo_full_name}")
127
+ else:
128
+ print(f"No processable data found for {repo_full_name}")
129
 
130
  except Exception as e:
131
  print(f"Failed to process repository {repo_full_name}: {e}")
132
+ finally:
133
+ # Clean up the cloned repository from disk
134
+ print(f"Cleaning up temporary directory: {temp_dir}")
135
+ shutil.rmtree(temp_dir)
136
+
137
 
138
+ # --- FastAPI App (Webhook handler is unchanged) ---
139
  app = FastAPI()
140
 
141
  async def verify_signature(request: Request):
142
+ # (This function is unchanged)
143
+ if not GITHUB_WEBHOOK_SECRET: raise HTTPException(status_code=500, detail="Webhook secret not configured.")
 
144
  signature_header = request.headers.get("X-Hub-Signature-256")
145
+ if not signature_header: raise HTTPException(status_code=400, detail="X-Hub-Signature-256 header is missing.")
 
146
  body = await request.body()
147
  sha_name, signature = signature_header.split("=")
148
  mac = hmac.new(GITHUB_WEBHOOK_SECRET.encode("utf-8"), msg=body, digestmod=hashlib.sha256)
149
+ if not hmac.compare_digest(mac.hexdigest(), signature): raise HTTPException(status_code=400, detail="Invalid signature.")
 
150
 
151
  @app.get("/")
152
  def read_root():
 
154
 
155
  @app.post("/api/github/webhook")
156
  async def github_webhook(request: Request):
157
+ # (This webhook handler is unchanged from the last step)
158
  await verify_signature(request)
 
159
  event_type = request.headers.get("X-GitHub-Event")
160
  payload = await request.json()
 
161
  print(f"Received event: {event_type} with action: {payload.get('action')}")
 
162
  installation_id = payload.get("installation", {}).get("id")
163
+ if not installation_id: return {"status": "ok", "message": "Event does not pertain to an installation."}
 
 
 
164
  repos_to_process = []
 
165
  if event_type == "installation" and payload.get("action") == "created":
 
166
  repos_to_process = payload.get("repositories", [])
 
 
167
  elif event_type == "installation_repositories" and payload.get("action") == "added":
 
168
  repos_to_process = payload.get("repositories_added", [])
 
 
169
  if repos_to_process:
170
  try:
171
  token = get_installation_access_token(installation_id, GITHUB_APP_ID, GITHUB_PRIVATE_KEY)
172
  for repo in repos_to_process:
173
  repo_full_name = repo["full_name"]
 
 
174
  repo_url = f"https://github.com/{repo_full_name}"
175
  process_repository(repo_url, token, repo_full_name)
176
  except Exception as e:
177
  print(f"Error during repository processing: {e}")
 
 
 
178
  elif event_type == "push":
179
  repo_name = payload.get("repository", {}).get("full_name")
180
  print(f"Received a push event on repo {repo_name}")
 
181
  return {"status": "ok"}