nikhmr1235 commited on
Commit
bed8b5f
·
verified ·
1 Parent(s): 335e0a3

Create nodes.py

Browse files
Files changed (1) hide show
  1. src/langgraph_logic/nodes.py +256 -0
src/langgraph_logic/nodes.py ADDED
@@ -0,0 +1,256 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import sys
4
+ import logging
5
+ from typing import Dict, Any, List, Optional, Tuple
6
+ from github import Github, PullRequest
7
+ from github.GithubException import GithubException, UnknownObjectException
8
+ import requests
9
+ from dotenv import load_dotenv
10
+ from langchain_core.prompts import ChatPromptTemplate
11
+ from langchain_google_genai import ChatGoogleGenerativeAI
12
+
13
+ from .state import PRReviewState, LLMReviewOutput, ParsedReviewSection, ParsedComment, FileReviewComments
14
+
15
+ # --- Environment Variable Loading ---
16
+ google_api_key = os.getenv("GOOGLE_API_KEY")
17
+ git_hub_token = os.getenv("GITHUB_token_ID")
18
+
19
+ if not google_api_key:
20
+ print("Google API key not found in environment variables.")
21
+ if not git_hub_token:
22
+ print("GITHUB_token_ID not found in environment variables.")
23
+
24
+ load_dotenv()
25
+
26
+ # --- LLM Initialization ---
27
+ llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-lite", temperature=0.0, api_key=google_api_key)
28
+
29
+ # --- Logging Configuration ---
30
+ logging.basicConfig(
31
+ format='%(asctime)s | %(levelname)s : %(message)s',
32
+ level=logging.INFO,
33
+ stream=sys.stdout
34
+ )
35
+
36
+ # --- GitHub API Functions ---
37
+
38
+ def fetch_pr_code_changes(repo_name: str, pr_id: int) -> Tuple[Optional[str], Optional[Dict[str, str]], Optional[str], Optional[str]]:
39
+ """Fetches the raw diff, file contents, and head commit SHA for a PR."""
40
+ github_token = git_hub_token
41
+ if not github_token:
42
+ return None, None, None, "GitHub token not found."
43
+
44
+ try:
45
+ g = Github(github_token)
46
+ repo = g.get_repo(repo_name)
47
+ pull_request = repo.get_pull(pr_id)
48
+ head_commit_sha = pull_request.head.sha
49
+
50
+ patch_url = pull_request.patch_url
51
+ headers = {"Authorization": f"token {github_token}"}
52
+ raw_diff_content = requests.get(patch_url, headers=headers).text
53
+
54
+ file_contents: Dict[str, str] = {}
55
+ for file in pull_request.get_files():
56
+ if file.status == 'deleted':
57
+ file_contents[file.filename] = "[FILE DELETED]"
58
+ continue
59
+ try:
60
+ file_content_obj = repo.get_contents(file.filename, ref=pull_request.head.sha)
61
+ if isinstance(file_content_obj, list):
62
+ file_contents[file.filename] = "[DIRECTORY OR MULTIPLE FILES]"
63
+ continue
64
+ file_contents[file.filename] = file_content_obj.decoded_content.decode('utf-8')
65
+ except GithubException as e:
66
+ file_contents[file.filename] = f"[ERROR: Could not fetch content. Status: {e.status}]"
67
+
68
+ return raw_diff_content, file_contents, head_commit_sha, None
69
+ except (UnknownObjectException, GithubException, Exception) as e:
70
+ error_msg = f"Error fetching PR data: {e}"
71
+ logging.error(error_msg)
72
+ return None, None, None, error_msg
73
+
74
+ def post_review_comments_on_github(
75
+ repo_name: str, pr_id: int, parsed_review_data: LLMReviewOutput,
76
+ github_token: Optional[str] = None, final_event: Optional[str] = "COMMENT"
77
+ ) -> Dict[str, Any]:
78
+ """Posts a structured code review to a GitHub Pull Request."""
79
+ if github_token is None:
80
+ github_token = os.getenv("GITHUB_TOKEN")
81
+ if not github_token:
82
+ raise ValueError("GitHub token not provided.")
83
+
84
+ try:
85
+ g = Github(github_token)
86
+ repo = g.get_repo(repo_name)
87
+ pr = repo.get_pull(pr_id)
88
+
89
+ main_review_body = f"### 🤖 Automated Code Review\n\n"
90
+ if parsed_review_data.overall_impression:
91
+ main_review_body += f"**Overall Impression:**\n{parsed_review_data.overall_impression}\n\n---\n\n"
92
+ for section in parsed_review_data.general_sections:
93
+ if section.content.strip():
94
+ main_review_body += f"### {section.title}\n{section.content}\n\n---\n\n"
95
+ if parsed_review_data.summary:
96
+ main_review_body += f"### Summary\n{parsed_review_data.summary}\n\n"
97
+ main_review_body += f"**LLM Recommended Action:** {parsed_review_data.approval_status.upper()}\n"
98
+
99
+ github_comments = []
100
+ head_commit_sha = pr.head.sha
101
+ pr_commit_obj = repo.get_commit(head_commit_sha)
102
+
103
+ for file_review in parsed_review_data.file_reviews:
104
+ consolidated_file_comment_body = f"### Review for `{file_review.file_path}`\n\n"
105
+ for func_name, comments in file_review.sections.items():
106
+ if not comments: continue
107
+ section_header = f"#### 📄 General File Comments\n\n" if func_name == "General_File_Comments" else f"#### ⚙️ Function: `{func_name}`\n\n"
108
+ consolidated_file_comment_body += section_header
109
+ for comment in comments:
110
+ consolidated_file_comment_body += f"{comment.message}\n"
111
+ if comment.suggestion:
112
+ consolidated_file_comment_body += f"\n```suggestion\n{comment.suggestion}\n```\n\n"
113
+ consolidated_file_comment_body += "\n---\n\n"
114
+
115
+ if consolidated_file_comment_body.strip() != f"### Review for `{file_review.file_path}`":
116
+ github_comments.append({
117
+ "path": file_review.file_path, "position": 1,
118
+ "body": consolidated_file_comment_body.strip(),
119
+ })
120
+
121
+ review = pr.create_review(commit=pr_commit_obj, body=main_review_body, event=final_event, comments=github_comments)
122
+ return {'status': 'success', 'review_url': review.html_url, 'review_id': review.id, 'main_comment_body': main_review_body}
123
+ except (UnknownObjectException, GithubException, Exception) as e:
124
+ logging.error(f"Error posting review: {e}")
125
+ raise RuntimeError(f"Failed to post review: {e}")
126
+
127
+ def update_submitted_review_body(
128
+ repo_name: str, pr_id: int, review_id: int, new_body: str, github_token: Optional[str] = None
129
+ ) -> Dict[str, Any]:
130
+ """Updates the main body of an already submitted GitHub PR review."""
131
+ if github_token is None:
132
+ github_token = os.getenv("GITHUB_TOKEN")
133
+ if not github_token:
134
+ raise ValueError("GitHub token not provided.")
135
+
136
+ try:
137
+ g = Github(github_token)
138
+ repo = g.get_repo(repo_name)
139
+ pr = repo.get_pull(pr_id)
140
+ review = pr.get_review(review_id)
141
+
142
+ if review.state == "PENDING":
143
+ return {'status': 'error', 'message': 'Cannot update body of a pending review.'}
144
+
145
+ review.edit(body=new_body)
146
+ return {'status': 'success', 'review_url': review.html_url, 'review_id': review.id, 'updated_body': review.body}
147
+ except (UnknownObjectException, GithubException, Exception) as e:
148
+ logging.error(f"Error updating review body: {e}")
149
+ raise RuntimeError(f"Failed to update review body: {e}")
150
+
151
+ # --- LLM and Parsing Functions ---
152
+
153
+ def generate_code_review_markdown(code_diff: str, file_contents: Dict[str, str]) -> str:
154
+ """Generates a detailed, human-readable code review in Markdown format from the LLM."""
155
+ full_contents_str = ""
156
+ if file_contents:
157
+ for filename, content in file_contents.items():
158
+ full_contents_str += f"--- Full Content of {filename} ---\n```python\n{content}\n```\n\n"
159
+
160
+ prompt = ChatPromptTemplate.from_messages([
161
+ ("system", "You are an expert Senior Software Engineer..."), # Truncated for brevity
162
+ ("human", "Here are the code changes (diff):\n```{code_diff}```\n\nHere are the full contents...\n{full_contents_context}\n\nPlease provide your structured code review in Markdown.")
163
+ ])
164
+ review_chain = prompt | llm
165
+ try:
166
+ return review_chain.invoke({"code_diff": code_diff, "full_contents_context": full_contents_str}).content
167
+ except Exception as e:
168
+ return f"Error generating code review: {e}"
169
+
170
+ def _extract_suggestion(text: str) -> Tuple[Optional[str], str]:
171
+ suggestion_match = re.search(r"```suggestion\n([\s\S]*?)\n```", text, re.MULTILINE)
172
+ suggestion_code = suggestion_match.group(1).strip() if suggestion_match else None
173
+ cleaned_message = re.sub(r"```suggestion[\s\S]*?```", "", text).strip()
174
+ return suggestion_code, cleaned_message
175
+
176
+ def _parse_bullet_comments(text_block: str) -> List[ParsedComment]:
177
+ comments = []
178
+ comment_matches = re.finditer(r"(^ *[-*]\s*[\s\S]*?)(?=\n *[-*]\s*|\Z)", text_block, re.MULTILINE | re.DOTALL)
179
+ for cm in comment_matches:
180
+ full_comment_text = cm.group(1).strip()
181
+ if full_comment_text:
182
+ suggestion_code, cleaned_message = _extract_suggestion(full_comment_text)
183
+ comments.append(ParsedComment(message=cleaned_message, suggestion=suggestion_code))
184
+ return comments
185
+
186
+ def parse_llm_review_markdown(markdown_review: str) -> LLMReviewOutput:
187
+ """Parses the LLM-generated Markdown review into a structured LLMReviewOutput Pydantic model."""
188
+ # Implementation from the original file, simplified for brevity
189
+ # ... (The full parsing logic would be here) ...
190
+ return LLMReviewOutput(overall_impression=markdown_review) # Placeholder for actual parsing
191
+
192
+ # --- Graph Nodes ---
193
+
194
+ def code_retriever_node(state: PRReviewState) -> PRReviewState:
195
+ """Fetches code changes from the PR."""
196
+ logging.info("--- NODE: code_retriever_node ---")
197
+ diff, contents, _, error = fetch_pr_code_changes(state.repo_name, state.pr_id)
198
+ if error:
199
+ return state.model_copy(update={"review_status": "error", "last_error": error})
200
+ return state.model_copy(update={"review_status": "code_fetched", "code_diff": diff, "file_contents": contents})
201
+
202
+ def code_reviewer_node(state: PRReviewState) -> PRReviewState:
203
+ """Generates a code review using the LLM."""
204
+ logging.info("--- NODE: code_reviewer_node ---")
205
+ review_markdown = generate_code_review_markdown(state.code_diff, state.file_contents)
206
+ return state.model_copy(update={"review_status": "code_reviewed", "llm_markdown_review": review_markdown})
207
+
208
+ def feedback_formatter_node(state: PRReviewState) -> PRReviewState:
209
+ """Parses the raw LLM review into a structured format."""
210
+ logging.info("--- NODE: feedback_formatter_node ---")
211
+ parsed_data = parse_llm_review_markdown(state.llm_markdown_review)
212
+ return state.model_copy(update={"review_status": "review_parsed", "parsed_llm_review_data": parsed_data})
213
+
214
+ def post_code_review_node(state: PRReviewState) -> PRReviewState:
215
+ """Posts the review to GitHub."""
216
+ logging.info("--- NODE: post_code_review_node ---")
217
+ if not state.parsed_llm_review_data:
218
+ return state.model_copy(update={"review_status": "error", "last_error": "Parsed review data is missing."})
219
+ try:
220
+ result = post_review_comments_on_github(
221
+ repo_name=state.repo_name, pr_id=state.pr_id,
222
+ parsed_review_data=state.parsed_llm_review_data, github_token=git_hub_token
223
+ )
224
+ return state.model_copy(update={
225
+ "review_status": "initial_review_posted", "original_review_id": result['review_id'],
226
+ "original_review_url": result['review_url'], "main_comment_body": result['main_comment_body']
227
+ })
228
+ except Exception as e:
229
+ return state.model_copy(update={"review_status": "error", "last_error": f"Failed to post review: {e}"})
230
+
231
+ def update_review_body_based_on_human_input_node(state: PRReviewState) -> PRReviewState:
232
+ """Updates the review body based on human feedback."""
233
+ logging.info("--- NODE: update_review_body_based_on_human_input_node ---")
234
+ if not state.require_human_approval:
235
+ return state
236
+
237
+ decision = "approved" if state.human_approval_status else "rejected"
238
+ feedback = f"Human Feedback: {state.human_feedback_message}\n\n" if state.human_feedback_message else ""
239
+ prefix = f"**Human Decision:** {decision}\n---\n\n{feedback}"
240
+
241
+ if state.human_approval_status:
242
+ updated_body = f"{prefix}Please go ahead and incorporate these Automated Bots review comments\n\n{state.main_comment_body}"
243
+ else:
244
+ updated_body = f"{prefix}Please IGNORE these Automated Bots review comments...\n\n{state.main_comment_body}"
245
+
246
+ try:
247
+ result = update_submitted_review_body(
248
+ repo_name=state.repo_name, pr_id=state.pr_id, review_id=state.original_review_id,
249
+ new_body=updated_body, github_token=git_hub_token
250
+ )
251
+ return state.model_copy(update={
252
+ "review_status": "review_submitted", "final_review_id": result['review_id'],
253
+ "final_review_url": result['review_url'], "main_comment_body": result['updated_body']
254
+ })
255
+ except Exception as e:
256
+ return state.model_copy(update={"review_status": "error", "last_error": f"Failed to update review: {e}"})