File size: 26,439 Bytes
bed8b5f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d47a09f
 
 
 
 
 
 
 
 
 
 
 
 
 
bed8b5f
d47a09f
bed8b5f
d47a09f
 
bed8b5f
 
 
 
 
d47a09f
 
bed8b5f
d47a09f
bed8b5f
d47a09f
 
 
bed8b5f
 
 
 
d47a09f
bed8b5f
 
d47a09f
bed8b5f
 
 
d47a09f
bed8b5f
d47a09f
 
 
 
bed8b5f
d47a09f
bed8b5f
d47a09f
bed8b5f
 
d47a09f
bed8b5f
d47a09f
bed8b5f
d47a09f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bed8b5f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d47a09f
 
 
 
 
 
 
 
 
 
 
 
 
bed8b5f
 
 
d47a09f
bed8b5f
d47a09f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bed8b5f
d47a09f
 
bed8b5f
d47a09f
 
 
 
 
bed8b5f
d47a09f
 
 
bed8b5f
d47a09f
bed8b5f
d47a09f
bed8b5f
 
d47a09f
 
bed8b5f
 
 
d47a09f
bed8b5f
d47a09f
bed8b5f
 
 
 
 
 
 
 
 
 
269a5f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bed8b5f
 
 
d47a09f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa133b0
 
 
 
 
 
bed8b5f
d47a09f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bed8b5f
 
d47a09f
 
 
 
bed8b5f
d47a09f
 
 
 
 
 
bed8b5f
d47a09f
bed8b5f
d47a09f
 
 
 
bed8b5f
d47a09f
 
bed8b5f
d47a09f
 
 
 
 
bed8b5f
 
d47a09f
 
 
 
 
 
 
bed8b5f
 
d47a09f
 
 
 
 
 
 
bed8b5f
d47a09f
 
bed8b5f
d47a09f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bed8b5f
d47a09f
 
bed8b5f
 
d47a09f
bed8b5f
d47a09f
 
 
 
 
bed8b5f
d47a09f
 
bed8b5f
d47a09f
 
 
 
 
bed8b5f
 
d47a09f
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
import os
import re
import sys
import logging
from typing import Dict, Any, List, Optional, Tuple
from github import Github, PullRequest
from github.GithubException import GithubException, UnknownObjectException
import requests
from dotenv import load_dotenv
from langchain_core.prompts import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI

from .state import PRReviewState, LLMReviewOutput, ParsedReviewSection, ParsedComment, FileReviewComments

# --- Environment Variable Loading ---
google_api_key = os.getenv("GOOGLE_API_KEY")
git_hub_token = os.getenv("GITHUB_token_ID")

if not google_api_key:
    print("Google API key not found in environment variables.")
if not git_hub_token:
    print("GITHUB_token_ID not found in environment variables.")

load_dotenv()

# --- LLM Initialization ---
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-lite", temperature=0.0, api_key=google_api_key)

# --- Logging Configuration ---
logging.basicConfig(
    format='%(asctime)s | %(levelname)s : %(message)s',
    level=logging.INFO,
    stream=sys.stdout
)

# --- GitHub API Functions ---

def fetch_pr_code_changes(repo_name: str, pr_id: int) -> Tuple[Optional[str], Optional[Dict[str, str]], Optional[str], Optional[str]]:
    """
    Fetches the raw diff content, the full contents of changed files,
    and the head commit SHA for a given PR.
    Args:
        repo_name (str): The full name of the repository (e.g., "octocat/Spoon-Knife").
        pr_id (int): The ID of the Pull Request.
    Returns:
        Tuple[Optional[str], Optional[Dict[str, str]], Optional[str], Optional[str]]:
            - raw_diff_content (str or None): The raw diff content of the PR.
            - file_contents (Dict[str, str] or None): Dictionary mapping filename to its full content (after changes).
            - head_commit_sha (str or None): The SHA of the head commit of the PR.
            - error_message (str or None): An error message if something went wrong.
    """
    #github_token = os.getenv("GITHUB_TOKEN")
    github_token = git_hub_token

    if not github_token:
        print("Error: GITHUB_TOKEN environment variable not set.")
        return None, None, None, "GitHub token not found in environment variables."

    try:
        g = Github(github_token)
        repo = g.get_repo(repo_name)
        pull_request = repo.get_pull(pr_id)

        # --- NEW: Get the head commit SHA ---
        head_commit_sha = pull_request.head.sha
        print(f"Fetched PR {pr_id} head commit SHA: {head_commit_sha}")


        # 1. Fetch raw diff content (patch)
        # Using requests directly for patch_url is good as PyGithub's get_patch() can sometimes be rate-limited differently
        patch_url = pull_request.patch_url
        headers = {"Authorization": f"token {github_token}"}
        raw_diff_content = requests.get(patch_url, headers=headers).text

        # 2. Fetch full content of changed files
        file_contents: Dict[str, str] = {}
        for file in pull_request.get_files():
            # Skip files that were deleted, as their content cannot be retrieved from the current head.
            if file.status == 'deleted':
                file_contents[file.filename] = "[FILE DELETED]"
                continue

            try:
                # We want the content *after* the change, which is from the PR's head branch.
                # PyGithub's get_contents should be called with `ref` set to `pull_request.head.ref`
                # or `pull_request.head.sha` for explicit content at the PR's head.
                # Using pull_request.head.sha is more robust as ref might change.
                file_content_obj = repo.get_contents(file.filename, ref=pull_request.head.sha)

                if isinstance(file_content_obj, list):
                    print(f"Warning: '{file.filename}' is a directory or multiple files, skipping content retrieval for now.")
                    file_contents[file.filename] = "[DIRECTORY OR MULTIPLE FILES]"
                    continue

                file_contents[file.filename] = file_content_obj.decoded_content.decode('utf-8')

            except GithubException as e:
                print(f"Warning: GitHub API error fetching content for {file.filename} (PR {pr_id}, Repo {repo_name}): {e.status} - {e.data.get('message', 'No message')}")
                file_contents[file.filename] = f"[ERROR: Could not fetch content. Status: {e.status}, Message: {e.data.get('message', 'No message')}]"
            except Exception as e:
                print(f"Unexpected error fetching content for {file.filename} (PR {pr_id}, Repo {repo_name}): {e}")
                file_contents[file.filename] = f"[ERROR: Unexpected error fetching content: {e}]"

        # Return the new head_commit_sha along with existing returns
        return raw_diff_content, file_contents, head_commit_sha, None # No error message if successful

    except UnknownObjectException as e:
        error_msg = f"GitHub object not found (repo or PR): {e.data.get('message', 'No message')}"
        print(f"Error in fetch_pr_code_changes: {error_msg}")
        return None, None, None, error_msg
    except GithubException as e:
        error_msg = f"GitHub API error for PR {pr_id} from {repo_name}: {e.status} - {e.data.get('message', 'No message')}"
        print(f"Error in fetch_pr_code_changes: {error_msg}")
        return None, None, None, error_msg
    except Exception as e:
        error_msg = f"An unexpected error occurred while fetching PR {pr_id} from {repo_name}: {e}"
        print(f"Error in fetch_pr_code_changes: {error_msg}")
        return None, None, None, error_msg

def post_review_comments_on_github(
    repo_name: str, pr_id: int, parsed_review_data: LLMReviewOutput,
    github_token: Optional[str] = None, final_event: Optional[str] = "COMMENT"
) -> Dict[str, Any]:
    """Posts a structured code review to a GitHub Pull Request."""
    if github_token is None:
        github_token = os.getenv("GITHUB_TOKEN")
    if not github_token:
        raise ValueError("GitHub token not provided.")

    try:
        g = Github(github_token)
        repo = g.get_repo(repo_name)
        pr = repo.get_pull(pr_id)
        
        main_review_body = f"### 🤖 Automated Code Review\n\n"
        if parsed_review_data.overall_impression:
            main_review_body += f"**Overall Impression:**\n{parsed_review_data.overall_impression}\n\n---\n\n"
        for section in parsed_review_data.general_sections:
            if section.content.strip():
                main_review_body += f"### {section.title}\n{section.content}\n\n---\n\n"
        if parsed_review_data.summary:
            main_review_body += f"### Summary\n{parsed_review_data.summary}\n\n"
        main_review_body += f"**LLM Recommended Action:** {parsed_review_data.approval_status.upper()}\n"

        github_comments = []
        head_commit_sha = pr.head.sha
        pr_commit_obj = repo.get_commit(head_commit_sha)

        for file_review in parsed_review_data.file_reviews:
            consolidated_file_comment_body = f"### Review for `{file_review.file_path}`\n\n"
            for func_name, comments in file_review.sections.items():
                if not comments: continue
                section_header = f"#### 📄 General File Comments\n\n" if func_name == "General_File_Comments" else f"#### ⚙️ Function: `{func_name}`\n\n"
                consolidated_file_comment_body += section_header
                for comment in comments:
                    consolidated_file_comment_body += f"{comment.message}\n"
                    if comment.suggestion:
                        consolidated_file_comment_body += f"\n```suggestion\n{comment.suggestion}\n```\n\n"
                    consolidated_file_comment_body += "\n---\n\n"
            
            if consolidated_file_comment_body.strip() != f"### Review for `{file_review.file_path}`":
                github_comments.append({
                    "path": file_review.file_path, "position": 1,
                    "body": consolidated_file_comment_body.strip(),
                })
        
        review = pr.create_review(commit=pr_commit_obj, body=main_review_body, event=final_event, comments=github_comments)
        return {'status': 'success', 'review_url': review.html_url, 'review_id': review.id, 'main_comment_body': main_review_body}
    except (UnknownObjectException, GithubException, Exception) as e:
        logging.error(f"Error posting review: {e}")
        raise RuntimeError(f"Failed to post review: {e}")

def update_submitted_review_body(
    repo_name: str, pr_id: int, review_id: int, new_body: str, github_token: Optional[str] = None
) -> Dict[str, Any]:
    """Updates the main body of an already submitted GitHub PR review."""
    if github_token is None:
        github_token = os.getenv("GITHUB_TOKEN")
    if not github_token:
        raise ValueError("GitHub token not provided.")
    
    try:
        g = Github(github_token)
        repo = g.get_repo(repo_name)
        pr = repo.get_pull(pr_id)
        review = pr.get_review(review_id)
        
        if review.state == "PENDING":
            return {'status': 'error', 'message': 'Cannot update body of a pending review.'}
            
        review.edit(body=new_body)
        return {'status': 'success', 'review_url': review.html_url, 'review_id': review.id, 'updated_body': review.body}
    except (UnknownObjectException, GithubException, Exception) as e:
        logging.error(f"Error updating review body: {e}")
        raise RuntimeError(f"Failed to update review body: {e}")

# --- LLM and Parsing Functions ---

def generate_code_review_markdown(code_diff: str, file_contents: Dict[str, str]) -> str:
    """
    Generates a detailed, human-readable code review in Markdown format from the LLM.
    The prompt is designed to elicit structured Markdown output that can then be
    parsed for GitHub PR comments, grouped by file and function.
    Args:
        code_diff (str): The string representation of the code diff.
        file_contents (Dict[str, str]): A dictionary where keys are file paths
                                        and values are their full content.
    Returns:
        str: A Markdown string representing the code review.
    """

    # Prepare full contents context
    full_contents_str = ""
    if file_contents:
        for filename, content in file_contents.items():
            # Add a clear separator and Markdown code block for each file
            full_contents_str += f"--- Full Content of {filename} ---\n```python\n{content}\n```\n\n"
    else:
        full_contents_str = "No full file contents provided for additional context."

    # Construct the Prompt Template
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system",
             "You are an expert Senior Software Engineer and a meticulous code reviewer.\n"
             "Your task is to review the provided code changes in a Pull Request.\n"
             "Analyze the `code_diff` for potential bugs, performance issues, security vulnerabilities, code style violations, maintainability concerns, and missing tests or documentation.\n"
             "Refer to the `full_file_contents` for additional context if the diff alone is insufficient to understand the changes or their implications.\n"
             "Provide a comprehensive, actionable, and constructive review.\n"
             "Format your review clearly using Markdown. Structure it with the following top-level sections:\n"
             "1. **Overall Impression:** A brief summary of the PR's purpose and overall quality.\n"
             "2. **Specific Observations and Suggestions:** Detailed feedback, grouped by file.\n"
             "   - Within each file's section, group related comments, ideally by function or logical block.\n"
             "   - For each observation/suggestion, include relevant line numbers from the *new* file for context (e.g., 'Line X-Y:').\n"
             "3. **Potential Issues and Edge Cases:** Discuss any missed scenarios or potential problems.\n"
             "4. **Security Implications:** Highlight any security concerns.\n"
             "5. **Adherence to Best Practices (PEP 8):** Comment on style and best practice compliance.\n"
             "6. **Performance Considerations:** Discuss performance aspects.\n"
             "7. **Unit Testing Suggestions:** Recommend additional tests.\n"
             "8. **Docstring/Comment Improvements:** Suggest documentation enhancements.\n"
             "9. **Clarity and Conciseness:** Feedback on code readability.\n"
             "10. **Summary:** A concise conclusion and recommended action (e.g., 'Approve', 'Request Changes', 'Comment').\n\n"
             "For code suggestions, use GitHub's Markdown code block with 'suggestion' annotation, like this:\n"
             "```suggestion\n"
             "your_suggested_code_here\n"
             "```\n"
             "Ensure file paths are correctly formatted (e.g., `src/utils/data_processor.py`)."
            ),
            ("human",
             "Here are the code changes (diff):\n"
             "```diff\n"
             "{code_diff}\n"
             "```\n\n"
             "Here are the full contents of the changed files (for additional context, use only if necessary to understand the diff):\n"
             "{full_contents_context}\n\n"
             "Please provide your structured code review in Markdown."
            ),
        ]
    )

    # Create the Chain
    review_chain = prompt | llm

    # Invoke the Chain
    try:
        review_markdown = review_chain.invoke({
            "code_diff": code_diff,
            "full_contents_context": full_contents_str
        }).content # Access the content attribute for Chat model output
        return review_markdown
    except Exception as e:
        print(f"Error generating code review: {e}")
        return f"Error: Could not generate code review. {e}\n\n" \
               f"Please check the LLM API call or token limits."

# Helper to extract suggestion block and clean message (No change needed)
def _extract_suggestion(text: str) -> Tuple[Optional[str], str]:
    """Helper to extract suggestion block and clean message."""
    suggestion_match = re.search(r"```suggestion\n([\s\S]*?)\n```", text, re.MULTILINE)
    suggestion_code = suggestion_match.group(1).strip() if suggestion_match else None
    
    # Remove suggestion from the main message
    cleaned_message = re.sub(r"```suggestion[\s\S]*?```", "", text).strip()
    return suggestion_code, cleaned_message

# Helper to parse bullet-point comments (No change needed, already uses ParsedComment)
def _parse_bullet_comments(text_block: str) -> List[ParsedComment]:
    """Helper to parse bullet-point comments from a given text block."""
    comments = []
    comment_matches = re.finditer(r"(^ *[-*]\s*[\s\S]*?)(?=\n *[-*]\s*|\Z)", text_block, re.MULTILINE | re.DOTALL)
    for cm in comment_matches:
        full_comment_text = cm.group(1).strip()
        if full_comment_text:
            suggestion_code, cleaned_message = _extract_suggestion(full_comment_text)
            comments.append(ParsedComment(message=cleaned_message, suggestion=suggestion_code))
    return comments

def parse_llm_review_markdown(markdown_review: str) -> LLMReviewOutput:
    """
    Parses the LLM-generated Markdown review into a structured LLMReviewOutput Pydantic model.
    This version is designed to be robust against formatting variations by dynamically finding
    section headers and extracting content between them.
    Args:
        markdown_review (str): The full Markdown string generated by the LLM.
    Returns:
        LLMReviewOutput: A Pydantic model containing structured review data.
    """
    temp_structured_data: Dict[str, Any] = {
        'overall_impression': None,
        'file_reviews': [],
        'general_sections': [],
        'summary': None,
        'approval_status': 'Comment'
    }

    # --- 1. Find all major section headers and their positions ---
    # This pattern recognizes "## Section Title:" and "1. **Section Title:**"
    section_header_pattern = re.compile(
        r"^(?:##\s+([\w\s/()]+):|(\d+)\.\s+\*\*([\w\s/()]+):\*\*)\s*$",
        re.MULTILINE
    )
    
    sections = []
    for match in section_header_pattern.finditer(markdown_review):
        # Consolidate title from group 1 (for '## Title:') or group 3 (for '1. **Title:**')
        title = match.group(1) or match.group(3)
        if title:
            sections.append({
                'title': title.strip(),
                'content_start': match.end(),
                'header_start': match.start()
            })

    if not sections:
        if markdown_review.strip():
             temp_structured_data['summary'] = "Could not parse the review markdown. The format was not recognized."
             temp_structured_data['overall_impression'] = markdown_review
        return LLMReviewOutput(**temp_structured_data)

    # --- 2. Process each identified section by extracting content between headers ---
    for i in range(len(sections)):
        current_section = sections[i]
        title = current_section['title']
        content_start = current_section['content_start']
        
        # The content ends where the next section's header begins.
        # For the last section, it ends at the end of the string.
        content_end = sections[i+1]['header_start'] if i + 1 < len(sections) else len(markdown_review)
        
        content = markdown_review[content_start:content_end].strip()

        if "Overall Impression" in title:
            temp_structured_data['overall_impression'] = content
        
        elif "Specific Observations and Suggestions" in title:
            # This pattern handles "- **`file.py`:**", "### file.py", and "**File: file.py**"
            file_header_line_pattern = re.compile(
                r"^\s*(?:-\s+\*\*(?:`?)([\w\/\.\-_]+\.\w+)(?:`?):\*\*|###\s*`?([\w\/\.\-_]+\.\w+)`?|\*\*File:\s*`?([\w\/\.\-_]+\.\w+)`?\*\*)\s*$",
                re.MULTILINE
            )
            
            file_matches = list(file_header_line_pattern.finditer(content))
            
            for j, match in enumerate(file_matches):
                file_name = next((g for g in match.groups() if g is not None), None)
                if not file_name: continue

                file_name = file_name.strip().replace('`', '')
                
                start_idx = match.end()
                end_idx = file_matches[j+1].start() if j + 1 < len(file_matches) else len(content)
                file_content_block = content[start_idx:end_idx].strip()

                # Assumes _parse_bullet_comments is defined elsewhere and works correctly
                parsed_comments = _parse_bullet_comments(file_content_block)
                
                if parsed_comments:
                    temp_structured_data['file_reviews'].append(FileReviewComments(
                        file_path=file_name,
                        sections={"General_File_Comments": parsed_comments}
                    ))

        elif "Summary" in title:
            temp_structured_data['summary'] = content
            # Extract approval status from the summary
            approval_match = re.search(r"^\s*\*\*(?:Action|Recommended Action|Status):\*\*\s*(Approve|Request Changes|Comment)", content, re.IGNORECASE | re.MULTILINE)
            if approval_match:
                temp_structured_data['approval_status'] = approval_match.group(1).strip().capitalize()
        
        else: # Any other section is treated as a general section
            if content:
                temp_structured_data['general_sections'].append(ParsedReviewSection(
                    title=title,
                    content=content
                ))

    # --- 3. Final fallbacks and cleanup ---
    if not temp_structured_data['summary']:
        temp_structured_data['summary'] = "Automated review completed."

    return LLMReviewOutput(**temp_structured_data)

# --- Graph Nodes ---

def code_retriever_node(state:PRReviewState):
    repo_name = state.repo_name
    pull_req_id = state.pr_id

    print(f"code_retriever_node started")
    print(f"repo_name :{repo_name}-------- pull_req_id:{pull_req_id}")

    diff, contents,head_commit_sha, error = fetch_pr_code_changes(repo_name, pull_req_id)

    # Don't forget to return an updated state, as nodes in LangGraph should always do
    # For this simple example, we'll just return a copy with an updated status
    updated_state = state.model_copy(update={
        "review_status": "code_fetched", # Update status after retrieval logic
        "code_diff": diff,
        "file_contents": contents
    })
    return updated_state

def code_reviewer_node(state:PRReviewState):
    code_diff = state.code_diff
    file_contents = state.file_contents

    print(f"code_reviewer_node started")

    review_markdown = generate_code_review_markdown(code_diff, file_contents)

    # --- DEBUG LOGGING START ---
    print("\n" + "="*50)
    print("--- RAW LLM MARKDOWN OUTPUT ---")
    print(review_markdown)
    print("="*50 + "\n")
    # --- DEBUG LOGGING END ---

    # Don't forget to return an updated state, as nodes in LangGraph should always do
    # For this simple example, we'll just return a copy with an updated status
    updated_state = state.model_copy(update={
        "review_status": "code_reviewed", # Update status after retrieval logic
        "llm_markdown_review":review_markdown,
    })
    return updated_state

def feedback_formatter_node(state: PRReviewState):
    print(f"feedback_formatter_node started")
    llm_markdown_review = state.llm_markdown_review

    parsed_llm_review_data = parse_llm_review_markdown(llm_markdown_review)

    # Don't forget to return an updated state, as nodes in LangGraph should always do
    # For this simple example, we'll just return a copy with an updated status
    updated_state = state.model_copy(update={
        "review_status": "review_parsed" ,# Update status after retrieval logic
        "parsed_llm_review_data":parsed_llm_review_data,
    })
    return updated_state

def post_code_review_node(state: PRReviewState) -> PRReviewState:
    """
    Posts the LLM-generated review as a PENDING GitHub review.
    """
    print("--- NODE: post_code_review_node ---")
    if not state.parsed_llm_review_data:
        raise ValueError("Cannot post pending review: parsed_llm_review_data is missing.")

    repo_name = state.repo_name
    pr_id = state.pr_id
    parsed_llm_review_data = state.parsed_llm_review_data

    try:
        # Call the helper to post as PENDING
        result = post_review_comments_on_github(
            repo_name=repo_name,
            pr_id=pr_id,
            parsed_review_data=parsed_llm_review_data,
            github_token=git_hub_token,
        )

        print(f"result from post_pending_review_node():result = {result}")
        return state.model_copy(update={
            "review_status": "initial_review_posted",
            "original_review_id": result['review_id'],
            "original_review_url": result['review_url'],
            "main_comment_body": result['main_comment_body'],
            "last_error": None # Clear previous errors
        })
    except Exception as e:
        #logging.error(f"Error posting pending review: {e}")
        print(f"Error posting pending review: {e}")
        return state.model_copy(update={
            "review_status": "error",
            "last_error": f"Failed to post pending review: {e}"
        })


def update_review_body_based_on_human_input_node(state: PRReviewState) -> PRReviewState:
    """
    Posts the LLM-generated review as a PENDING GitHub review.
    """
    print("--- NODE: update_review_body_based_on_human_input_node ---")
    if not state.main_comment_body:
        raise ValueError("Cannot update submitted review body: main_comment_body is missing.")

    if not state.require_human_approval:
        print("require_human_approval is False so exiting this function")
        logging.info("require_human_approval is False so exiting this function")
        return state

    repo_name = state.repo_name
    pr_id = state.pr_id
    original_review_id = state.original_review_id
    main_comment_body = state.main_comment_body

    if state.human_approval_status is True:
        if state.human_feedback_message is not None:
            updated_review_body = f"""**Human Decision:** approved\n---\n\nHuman Feedback: {state.human_feedback_message}\n\n Please go ahead and incorporate these Automated Bots review comments\n\n{main_comment_body}"""
        else:
            updated_review_body = f"""**Human Decision:** approved\n---\n\n Please go ahead and incorporate these Automated Bots review comments\n\n{main_comment_body}"""
        
    elif state.human_approval_status is False:
        if state.human_feedback_message is not None:
            updated_review_body = f"""**Human Decision:** Reject\n---\n\nHuman Feedback: {state.human_feedback_message}\n\n Please IGNORE these Automated Bots review comments and wait for new review comments from your team\n\n{main_comment_body}"""
        else:
            updated_review_body = f"""**Human Decision:** Reject\n---\n\nPlease IGNORE these Automated Bots review comments and wait for new review comments from your team\n\n{main_comment_body}"""
    else:
        return state
        

    try:
        # Call the helper to post as PENDING
        result = update_submitted_review_body(
            repo_name=repo_name,
            pr_id=pr_id,
            review_id = original_review_id,
            new_body =updated_review_body,
            github_token=git_hub_token
        )

        print(f"result from update_submitted_review_body_node():result = {result}")
        return state.model_copy(update={
            "review_status": "review_submitted",
            "final_review_id": result['review_id'],
            "final_review_url": result['review_url'],
            "main_comment_body": result['updated_body'],
            "last_error": None # Clear previous errors
        })
    except Exception as e:
        #logging.error(f"Error posting pending review: {e}")
        print(f"Error posting pending review: {e}")
        return state.model_copy(update={
            "review_status": "error",
            "last_error": f"Failed to post pending review: {e}"
        })