SharathReddy commited on
Commit
0a6f593
·
verified ·
1 Parent(s): 89ed536

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -32
app.py CHANGED
@@ -23,7 +23,7 @@ GITHUB_PRIVATE_KEY = os.getenv("GITHUB_PRIVATE_KEY")
23
  # In-memory storage for our repository data
24
  repo_data_store = {}
25
 
26
- # --- NEW: Code Processing and Vectorization Class ---
27
 
28
  class CodeProcessor:
29
  def __init__(self, model_name='all-MiniLM-L6-v2'):
@@ -32,24 +32,30 @@ class CodeProcessor:
32
  print("Model initialized.")
33
 
34
  def parse_python_file(self, file_path):
35
- """Parses a Python file to extract functions and their docstrings using AST."""
36
- with open(file_path, 'r', encoding='utf-8') as source:
37
- try:
38
- tree = ast.parse(source.read())
39
- except SyntaxError:
40
- return [] # Ignore files with syntax errors
 
 
 
 
 
 
41
 
42
  functions = []
43
  for node in ast.walk(tree):
44
  if isinstance(node, ast.FunctionDef):
45
- # We extract the function's name and its full source code
46
- # The source code provides more context than just the docstring
47
- function_source = ast.get_source_segment(tree, node)
48
- functions.append({
49
- "name": node.name,
50
- "source": function_source,
51
- "file_path": file_path
52
- })
53
  return functions
54
 
55
  def process_repo(self, repo_path):
@@ -59,44 +65,44 @@ class CodeProcessor:
59
  for root, _, files in os.walk(repo_path):
60
  for file in files:
61
  if file.endswith('.py'):
62
- file_path = os.path.join(root, file)
63
- functions = self.parse_python_file(file_path)
64
- all_functions.extend(functions)
 
 
 
 
 
 
65
 
66
  if not all_functions:
67
  print("No Python functions found in the repository.")
68
  return None, None
69
 
70
  print(f"Found {len(all_functions)} functions. Generating embeddings...")
71
- # We create embeddings for the source code of each function
72
  function_sources = [f["source"] for f in all_functions]
73
  embeddings = self.model.encode(function_sources, show_progress_bar=False)
74
 
75
- # Create a FAISS index
76
  embedding_dim = embeddings.shape[1]
77
  index = faiss.IndexFlatL2(embedding_dim)
78
  index.add(np.array(embeddings, dtype=np.float32))
79
 
80
  print(f"FAISS index created successfully with {index.ntotal} vectors.")
81
 
82
- # We return the index and the metadata (the list of function details)
83
  return index, all_functions
84
 
85
  # --- Initialize the processor globally ---
86
- # This ensures the model is loaded only once when the app starts.
87
  code_processor = CodeProcessor()
88
 
89
 
90
- # --- GitHub App Authentication & Repo Management (Largely Unchanged) ---
91
-
92
  def create_jwt(app_id, private_key):
93
- # (This function is unchanged)
94
  now = int(time.time())
95
  payload = {"iat": now, "exp": now + (10 * 60), "iss": app_id}
96
  return jwt.encode(payload, private_key, algorithm="RS256")
97
 
98
  def get_installation_access_token(installation_id, app_id, private_key):
99
- # (This function is unchanged)
100
  app_jwt = create_jwt(app_id, private_key)
101
  headers = {"Authorization": f"Bearer {app_jwt}", "Accept": "application/vnd.github.v3+json"}
102
  url = f"https://api.github.com/app/installations/{installation_id}/access_tokens"
@@ -113,11 +119,9 @@ def process_repository(repo_url, token, repo_full_name):
113
  clone_url = repo_url.replace("https://", f"https://x-access-token:{token}@")
114
  Repo.clone_from(clone_url, temp_dir)
115
 
116
- # --- NEW: Use the CodeProcessor ---
117
  faiss_index, metadata = code_processor.process_repo(temp_dir)
118
 
119
  if faiss_index:
120
- # Store the live FAISS index and metadata in our in-memory store
121
  repo_data_store[repo_full_name] = {
122
  "status": "processed",
123
  "faiss_index": faiss_index,
@@ -130,16 +134,13 @@ def process_repository(repo_url, token, repo_full_name):
130
  except Exception as e:
131
  print(f"Failed to process repository {repo_full_name}: {e}")
132
  finally:
133
- # Clean up the cloned repository from disk
134
  print(f"Cleaning up temporary directory: {temp_dir}")
135
  shutil.rmtree(temp_dir)
136
 
137
-
138
  # --- FastAPI App (Webhook handler is unchanged) ---
139
  app = FastAPI()
140
 
141
  async def verify_signature(request: Request):
142
- # (This function is unchanged)
143
  if not GITHUB_WEBHOOK_SECRET: raise HTTPException(status_code=500, detail="Webhook secret not configured.")
144
  signature_header = request.headers.get("X-Hub-Signature-256")
145
  if not signature_header: raise HTTPException(status_code=400, detail="X-Hub-Signature-256 header is missing.")
@@ -154,7 +155,6 @@ def read_root():
154
 
155
  @app.post("/api/github/webhook")
156
  async def github_webhook(request: Request):
157
- # (This webhook handler is unchanged from the last step)
158
  await verify_signature(request)
159
  event_type = request.headers.get("X-GitHub-Event")
160
  payload = await request.json()
 
23
  # In-memory storage for our repository data
24
  repo_data_store = {}
25
 
26
+ # --- Code Processing and Vectorization Class ---
27
 
28
  class CodeProcessor:
29
  def __init__(self, model_name='all-MiniLM-L6-v2'):
 
32
  print("Model initialized.")
33
 
34
  def parse_python_file(self, file_path):
35
+ """Parses a Python file to extract functions and their source code using AST."""
36
+ # --- THE FIX IS HERE ---
37
+ # First, read the entire file content into a string variable.
38
+ with open(file_path, 'r', encoding='utf-8') as file:
39
+ source_code = file.read()
40
+
41
+ try:
42
+ # Parse the source code string.
43
+ tree = ast.parse(source_code)
44
+ except SyntaxError:
45
+ print(f"SyntaxError in {file_path}, skipping.")
46
+ return []
47
 
48
  functions = []
49
  for node in ast.walk(tree):
50
  if isinstance(node, ast.FunctionDef):
51
+ # Now, pass the correct `source_code` string to get the segment.
52
+ function_source = ast.get_source_segment(source_code, node)
53
+ if function_source: # Ensure we got a valid segment
54
+ functions.append({
55
+ "name": node.name,
56
+ "source": function_source,
57
+ "file_path": file_path.replace(tempfile.gettempdir(), "") # Store relative path
58
+ })
59
  return functions
60
 
61
  def process_repo(self, repo_path):
 
65
  for root, _, files in os.walk(repo_path):
66
  for file in files:
67
  if file.endswith('.py'):
68
+ # Create the full path relative to the repo root for parsing
69
+ full_path = os.path.join(root, file)
70
+ # Create a "clean" path for storage in metadata
71
+ clean_path = os.path.relpath(full_path, repo_path)
72
+
73
+ parsed_funcs = self.parse_python_file(full_path)
74
+ for func in parsed_funcs:
75
+ func['file_path'] = clean_path # Overwrite with the clean path
76
+ all_functions.extend(parsed_funcs)
77
 
78
  if not all_functions:
79
  print("No Python functions found in the repository.")
80
  return None, None
81
 
82
  print(f"Found {len(all_functions)} functions. Generating embeddings...")
 
83
  function_sources = [f["source"] for f in all_functions]
84
  embeddings = self.model.encode(function_sources, show_progress_bar=False)
85
 
 
86
  embedding_dim = embeddings.shape[1]
87
  index = faiss.IndexFlatL2(embedding_dim)
88
  index.add(np.array(embeddings, dtype=np.float32))
89
 
90
  print(f"FAISS index created successfully with {index.ntotal} vectors.")
91
 
 
92
  return index, all_functions
93
 
94
  # --- Initialize the processor globally ---
 
95
  code_processor = CodeProcessor()
96
 
97
 
98
+ # --- GitHub App Authentication & Repo Management (Unchanged) ---
99
+ # ... (The rest of the file from the previous correct version remains the same) ...
100
  def create_jwt(app_id, private_key):
 
101
  now = int(time.time())
102
  payload = {"iat": now, "exp": now + (10 * 60), "iss": app_id}
103
  return jwt.encode(payload, private_key, algorithm="RS256")
104
 
105
  def get_installation_access_token(installation_id, app_id, private_key):
 
106
  app_jwt = create_jwt(app_id, private_key)
107
  headers = {"Authorization": f"Bearer {app_jwt}", "Accept": "application/vnd.github.v3+json"}
108
  url = f"https://api.github.com/app/installations/{installation_id}/access_tokens"
 
119
  clone_url = repo_url.replace("https://", f"https://x-access-token:{token}@")
120
  Repo.clone_from(clone_url, temp_dir)
121
 
 
122
  faiss_index, metadata = code_processor.process_repo(temp_dir)
123
 
124
  if faiss_index:
 
125
  repo_data_store[repo_full_name] = {
126
  "status": "processed",
127
  "faiss_index": faiss_index,
 
134
  except Exception as e:
135
  print(f"Failed to process repository {repo_full_name}: {e}")
136
  finally:
 
137
  print(f"Cleaning up temporary directory: {temp_dir}")
138
  shutil.rmtree(temp_dir)
139
 
 
140
  # --- FastAPI App (Webhook handler is unchanged) ---
141
  app = FastAPI()
142
 
143
  async def verify_signature(request: Request):
 
144
  if not GITHUB_WEBHOOK_SECRET: raise HTTPException(status_code=500, detail="Webhook secret not configured.")
145
  signature_header = request.headers.get("X-Hub-Signature-256")
146
  if not signature_header: raise HTTPException(status_code=400, detail="X-Hub-Signature-256 header is missing.")
 
155
 
156
  @app.post("/api/github/webhook")
157
  async def github_webhook(request: Request):
 
158
  await verify_signature(request)
159
  event_type = request.headers.get("X-GitHub-Event")
160
  payload = await request.json()