Spaces:
Sleeping
Sleeping
Create langgraph_pr_review_bot.py
Browse files- langgraph_pr_review_bot.py +782 -0
langgraph_pr_review_bot.py
ADDED
|
@@ -0,0 +1,782 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Dict, Any, Optional
|
| 2 |
+
from pydantic import BaseModel, Field
|
| 3 |
+
from uuid import uuid4
|
| 4 |
+
|
| 5 |
+
class PRReviewState(BaseModel):
|
| 6 |
+
# GitHub PR Information (Mandatory)
|
| 7 |
+
pr_id: int
|
| 8 |
+
repo_name: str # e.g., "owner/repo"
|
| 9 |
+
|
| 10 |
+
# GitHub PR Information (Now Optional)
|
| 11 |
+
diff_url: Optional[str] = None
|
| 12 |
+
pr_title: Optional[str] = None
|
| 13 |
+
pr_author: Optional[str] = None
|
| 14 |
+
# review_run_id: str = Field(default_factory=lambda: str(uuid.uuid4())) # Optional: Unique ID for this specific review run
|
| 15 |
+
|
| 16 |
+
# Code Content (Now Optional)
|
| 17 |
+
code_diff: Optional[str] = None # The fetched raw diff content
|
| 18 |
+
file_contents: Dict[str, str] = {} # Map of filename to full content for context (already has default)
|
| 19 |
+
|
| 20 |
+
# LLM Review Outputs (Already Optional)
|
| 21 |
+
llm_markdown_review: Optional[str] = None # The raw Markdown output from the LLM (e.g., from generate_code_review_markdown)
|
| 22 |
+
parsed_llm_review_data: Optional[Dict[str, Any]] = None # Structured dict from parsing the Markdown (e.g., from parse_llm_review_markdown)
|
| 23 |
+
|
| 24 |
+
# Human-in-the-Loop (Simplified for Phase 1) (Already Optional or has default)
|
| 25 |
+
require_human_approval: bool = False # Config flag, set at graph initialization
|
| 26 |
+
human_approval_status: Optional[bool] = None # True if approved, False if rejected
|
| 27 |
+
human_feedback_message: Optional[str] = None # Any message from human rejection
|
| 28 |
+
|
| 29 |
+
# System Status (Now Optional or has default)
|
| 30 |
+
review_status: str = "initiated" # e.g., "initiated", "fetching_code", "code_retrieved", "generating_llm_review", "llm_review_generated", "parsing_llm_review", "review_parsed", "awaiting_human_approval", "posting_review", "posted", "rejected", "failed"
|
| 31 |
+
last_error: Optional[str] = None # Stores the last encountered error message
|
| 32 |
+
# error_traceback: Optional[str] = None # Optional: For more detailed error debugging
|
| 33 |
+
review_id: Optional[int] = None # this is the pull-request-review-id (pending -> approved/Discarded based on HIL) (Now Optional)
|
| 34 |
+
review_comment_url: Optional[str] = None # URL of the main posted GitHub review comment
|
| 35 |
+
|
| 36 |
+
'''
|
| 37 |
+
from kaggle_secrets import UserSecretsClient
|
| 38 |
+
user_secrets = UserSecretsClient()
|
| 39 |
+
git_hub_token = user_secrets.get_secret("GITHUB_token_ID")
|
| 40 |
+
google_api_key = user_secrets.get_secret("GOOGLE_API_KEY")
|
| 41 |
+
'''
|
| 42 |
+
|
| 43 |
+
import os
|
| 44 |
+
from typing import Dict, Any, List, Optional, Tuple # Ensure Tuple is imported
|
| 45 |
+
from github import Github, PullRequest
|
| 46 |
+
from github.GithubException import GithubException, UnknownObjectException
|
| 47 |
+
import requests # Make sure requests is imported for patch_url
|
| 48 |
+
from dotenv import load_dotenv
|
| 49 |
+
|
| 50 |
+
# For local testing, you might need to load dotenv if your environment variables
|
| 51 |
+
# are managed via a .env file. In a deployed environment, they would likely be
|
| 52 |
+
# set directly.
|
| 53 |
+
# Only load dotenv if it's not already loaded (e.g., in __main__ or a test setup)
|
| 54 |
+
# This prevents redundant loading in production or if your main script handles it.
|
| 55 |
+
if not os.getenv("GITHUB_TOKEN"): # Only load if token not already set
|
| 56 |
+
load_dotenv() # Load environment variables from .env file
|
| 57 |
+
|
| 58 |
+
# Assuming 'git_hub_token' is defined globally or passed in a larger context
|
| 59 |
+
# If git_hub_token is expected to be a global variable, ensure it's imported or declared.
|
| 60 |
+
# For better practice, pass it as an argument or rely solely on os.getenv.
|
| 61 |
+
# Let's adjust to purely rely on os.getenv for this function.
|
| 62 |
+
# github_token = os.getenv("GITHUB_TOKEN") # Moved inside function for safety
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def fetch_pr_code_changes(repo_name: str, pr_id: int) -> Tuple[Optional[str], Optional[Dict[str, str]], Optional[str], Optional[str]]:
|
| 66 |
+
"""
|
| 67 |
+
Fetches the raw diff content, the full contents of changed files,
|
| 68 |
+
and the head commit SHA for a given PR.
|
| 69 |
+
|
| 70 |
+
Args:
|
| 71 |
+
repo_name (str): The full name of the repository (e.g., "octocat/Spoon-Knife").
|
| 72 |
+
pr_id (int): The ID of the Pull Request.
|
| 73 |
+
|
| 74 |
+
Returns:
|
| 75 |
+
Tuple[Optional[str], Optional[Dict[str, str]], Optional[str], Optional[str]]:
|
| 76 |
+
- raw_diff_content (str or None): The raw diff content of the PR.
|
| 77 |
+
- file_contents (Dict[str, str] or None): Dictionary mapping filename to its full content (after changes).
|
| 78 |
+
- head_commit_sha (str or None): The SHA of the head commit of the PR.
|
| 79 |
+
- error_message (str or None): An error message if something went wrong.
|
| 80 |
+
"""
|
| 81 |
+
#github_token = os.getenv("GITHUB_TOKEN")
|
| 82 |
+
github_token = git_hub_token
|
| 83 |
+
|
| 84 |
+
if not github_token:
|
| 85 |
+
print("Error: GITHUB_TOKEN environment variable not set.")
|
| 86 |
+
return None, None, None, "GitHub token not found in environment variables."
|
| 87 |
+
|
| 88 |
+
try:
|
| 89 |
+
g = Github(github_token)
|
| 90 |
+
repo = g.get_repo(repo_name)
|
| 91 |
+
pull_request = repo.get_pull(pr_id)
|
| 92 |
+
|
| 93 |
+
# --- NEW: Get the head commit SHA ---
|
| 94 |
+
head_commit_sha = pull_request.head.sha
|
| 95 |
+
print(f"Fetched PR {pr_id} head commit SHA: {head_commit_sha}")
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
# 1. Fetch raw diff content (patch)
|
| 99 |
+
# Using requests directly for patch_url is good as PyGithub's get_patch() can sometimes be rate-limited differently
|
| 100 |
+
patch_url = pull_request.patch_url
|
| 101 |
+
headers = {"Authorization": f"token {github_token}"}
|
| 102 |
+
raw_diff_content = requests.get(patch_url, headers=headers).text
|
| 103 |
+
|
| 104 |
+
# 2. Fetch full content of changed files
|
| 105 |
+
file_contents: Dict[str, str] = {}
|
| 106 |
+
for file in pull_request.get_files():
|
| 107 |
+
# Skip files that were deleted, as their content cannot be retrieved from the current head.
|
| 108 |
+
if file.status == 'deleted':
|
| 109 |
+
file_contents[file.filename] = "[FILE DELETED]"
|
| 110 |
+
continue
|
| 111 |
+
|
| 112 |
+
try:
|
| 113 |
+
# We want the content *after* the change, which is from the PR's head branch.
|
| 114 |
+
# PyGithub's get_contents should be called with `ref` set to `pull_request.head.ref`
|
| 115 |
+
# or `pull_request.head.sha` for explicit content at the PR's head.
|
| 116 |
+
# Using pull_request.head.sha is more robust as ref might change.
|
| 117 |
+
file_content_obj = repo.get_contents(file.filename, ref=pull_request.head.sha)
|
| 118 |
+
|
| 119 |
+
if isinstance(file_content_obj, list):
|
| 120 |
+
print(f"Warning: '{file.filename}' is a directory or multiple files, skipping content retrieval for now.")
|
| 121 |
+
file_contents[file.filename] = "[DIRECTORY OR MULTIPLE FILES]"
|
| 122 |
+
continue
|
| 123 |
+
|
| 124 |
+
file_contents[file.filename] = file_content_obj.decoded_content.decode('utf-8')
|
| 125 |
+
|
| 126 |
+
except GithubException as e:
|
| 127 |
+
print(f"Warning: GitHub API error fetching content for {file.filename} (PR {pr_id}, Repo {repo_name}): {e.status} - {e.data.get('message', 'No message')}")
|
| 128 |
+
file_contents[file.filename] = f"[ERROR: Could not fetch content. Status: {e.status}, Message: {e.data.get('message', 'No message')}]"
|
| 129 |
+
except Exception as e:
|
| 130 |
+
print(f"Unexpected error fetching content for {file.filename} (PR {pr_id}, Repo {repo_name}): {e}")
|
| 131 |
+
file_contents[file.filename] = f"[ERROR: Unexpected error fetching content: {e}]"
|
| 132 |
+
|
| 133 |
+
# Return the new head_commit_sha along with existing returns
|
| 134 |
+
return raw_diff_content, file_contents, head_commit_sha, None # No error message if successful
|
| 135 |
+
|
| 136 |
+
except UnknownObjectException as e:
|
| 137 |
+
error_msg = f"GitHub object not found (repo or PR): {e.data.get('message', 'No message')}"
|
| 138 |
+
print(f"Error in fetch_pr_code_changes: {error_msg}")
|
| 139 |
+
return None, None, None, error_msg
|
| 140 |
+
except GithubException as e:
|
| 141 |
+
error_msg = f"GitHub API error for PR {pr_id} from {repo_name}: {e.status} - {e.data.get('message', 'No message')}"
|
| 142 |
+
print(f"Error in fetch_pr_code_changes: {error_msg}")
|
| 143 |
+
return None, None, None, error_msg
|
| 144 |
+
except Exception as e:
|
| 145 |
+
error_msg = f"An unexpected error occurred while fetching PR {pr_id} from {repo_name}: {e}"
|
| 146 |
+
print(f"Error in fetch_pr_code_changes: {error_msg}")
|
| 147 |
+
return None, None, None, error_msg
|
| 148 |
+
|
| 149 |
+
def code_retriever_node(state:PRReviewState):
|
| 150 |
+
repo_name = state.repo_name
|
| 151 |
+
pull_req_id = state.pr_id
|
| 152 |
+
|
| 153 |
+
print(f"repo_name :{repo_name}-------- pull_req_id:{pull_req_id}")
|
| 154 |
+
|
| 155 |
+
diff, contents,head_commit_sha, error = fetch_pr_code_changes(repo_name, pull_req_id)
|
| 156 |
+
|
| 157 |
+
# Don't forget to return an updated state, as nodes in LangGraph should always do
|
| 158 |
+
# For this simple example, we'll just return a copy with an updated status
|
| 159 |
+
updated_state = state.model_copy(update={
|
| 160 |
+
"review_status": "code_retrieved", # Update status after retrieval logic
|
| 161 |
+
"code_diff": diff,
|
| 162 |
+
"file_contents": contents
|
| 163 |
+
})
|
| 164 |
+
return updated_state
|
| 165 |
+
|
| 166 |
+
import os
|
| 167 |
+
from typing import Dict, Any
|
| 168 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 169 |
+
# Ensure you have your LLM provider installed, e.g., pip install langchain-google-genai
|
| 170 |
+
from langchain_google_genai import ChatGoogleGenerativeAI # Using Gemini as per your preference
|
| 171 |
+
|
| 172 |
+
# Initialize your LLM. Make sure your GOOGLE_API_KEY is set in environment variables.
|
| 173 |
+
# You can also configure other models like "gemini-1.5-flash" or "gemini-1.5-pro"
|
| 174 |
+
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.0, api_key=google_api_key) # Lower temperature for more deterministic output
|
| 175 |
+
|
| 176 |
+
def generate_code_review_markdown(code_diff: str, file_contents: Dict[str, str]) -> str:
|
| 177 |
+
"""
|
| 178 |
+
Generates a detailed, human-readable code review in Markdown format from the LLM.
|
| 179 |
+
|
| 180 |
+
The prompt is designed to elicit structured Markdown output that can then be
|
| 181 |
+
parsed for GitHub PR comments, grouped by file and function.
|
| 182 |
+
|
| 183 |
+
Args:
|
| 184 |
+
code_diff (str): The string representation of the code diff.
|
| 185 |
+
file_contents (Dict[str, str]): A dictionary where keys are file paths
|
| 186 |
+
and values are their full content.
|
| 187 |
+
|
| 188 |
+
Returns:
|
| 189 |
+
str: A Markdown string representing the code review.
|
| 190 |
+
"""
|
| 191 |
+
|
| 192 |
+
# Prepare full contents context
|
| 193 |
+
full_contents_str = ""
|
| 194 |
+
if file_contents:
|
| 195 |
+
for filename, content in file_contents.items():
|
| 196 |
+
# Add a clear separator and Markdown code block for each file
|
| 197 |
+
full_contents_str += f"--- Full Content of {filename} ---\n```python\n{content}\n```\n\n"
|
| 198 |
+
else:
|
| 199 |
+
full_contents_str = "No full file contents provided for additional context."
|
| 200 |
+
|
| 201 |
+
# Construct the Prompt Template
|
| 202 |
+
prompt = ChatPromptTemplate.from_messages(
|
| 203 |
+
[
|
| 204 |
+
("system",
|
| 205 |
+
"You are an expert Senior Software Engineer and a meticulous code reviewer.\n"
|
| 206 |
+
"Your task is to review the provided code changes in a Pull Request.\n"
|
| 207 |
+
"Analyze the `code_diff` for potential bugs, performance issues, security vulnerabilities, code style violations, maintainability concerns, and missing tests or documentation.\n"
|
| 208 |
+
"Refer to the `full_file_contents` for additional context if the diff alone is insufficient to understand the changes or their implications.\n"
|
| 209 |
+
"Provide a comprehensive, actionable, and constructive review.\n"
|
| 210 |
+
"Format your review clearly using Markdown. Structure it with the following top-level sections:\n"
|
| 211 |
+
"1. **Overall Impression:** A brief summary of the PR's purpose and overall quality.\n"
|
| 212 |
+
"2. **Specific Observations and Suggestions:** Detailed feedback, grouped by file.\n"
|
| 213 |
+
" - Within each file's section, group related comments, ideally by function or logical block.\n"
|
| 214 |
+
" - For each observation/suggestion, include relevant line numbers from the *new* file for context (e.g., 'Line X-Y:').\n"
|
| 215 |
+
"3. **Potential Issues and Edge Cases:** Discuss any missed scenarios or potential problems.\n"
|
| 216 |
+
"4. **Security Implications:** Highlight any security concerns.\n"
|
| 217 |
+
"5. **Adherence to Best Practices (PEP 8):** Comment on style and best practice compliance.\n"
|
| 218 |
+
"6. **Performance Considerations:** Discuss performance aspects.\n"
|
| 219 |
+
"7. **Unit Testing Suggestions:** Recommend additional tests.\n"
|
| 220 |
+
"8. **Docstring/Comment Improvements:** Suggest documentation enhancements.\n"
|
| 221 |
+
"9. **Clarity and Conciseness:** Feedback on code readability.\n"
|
| 222 |
+
"10. **Summary:** A concise conclusion and recommended action (e.g., 'Approve', 'Request Changes', 'Comment').\n\n"
|
| 223 |
+
"For code suggestions, use GitHub's Markdown code block with 'suggestion' annotation, like this:\n"
|
| 224 |
+
"```suggestion\n"
|
| 225 |
+
"your_suggested_code_here\n"
|
| 226 |
+
"```\n"
|
| 227 |
+
"Ensure file paths are correctly formatted (e.g., `src/utils/data_processor.py`)."
|
| 228 |
+
),
|
| 229 |
+
("human",
|
| 230 |
+
"Here are the code changes (diff):\n"
|
| 231 |
+
"```diff\n"
|
| 232 |
+
"{code_diff}\n"
|
| 233 |
+
"```\n\n"
|
| 234 |
+
"Here are the full contents of the changed files (for additional context, use only if necessary to understand the diff):\n"
|
| 235 |
+
"{full_contents_context}\n\n"
|
| 236 |
+
"Please provide your structured code review in Markdown."
|
| 237 |
+
),
|
| 238 |
+
]
|
| 239 |
+
)
|
| 240 |
+
|
| 241 |
+
# Create the Chain
|
| 242 |
+
review_chain = prompt | llm
|
| 243 |
+
|
| 244 |
+
# Invoke the Chain
|
| 245 |
+
try:
|
| 246 |
+
review_markdown = review_chain.invoke({
|
| 247 |
+
"code_diff": code_diff,
|
| 248 |
+
"full_contents_context": full_contents_str
|
| 249 |
+
}).content # Access the content attribute for Chat model output
|
| 250 |
+
return review_markdown
|
| 251 |
+
except Exception as e:
|
| 252 |
+
print(f"Error generating code review: {e}")
|
| 253 |
+
return f"Error: Could not generate code review. {e}\n\n" \
|
| 254 |
+
f"Please check the LLM API call or token limits."
|
| 255 |
+
|
| 256 |
+
def code_reviewer_node(state:PRReviewState):
|
| 257 |
+
code_diff = state.code_diff
|
| 258 |
+
file_contents = state.file_contents
|
| 259 |
+
|
| 260 |
+
review_markdown = generate_code_review_markdown(code_diff, file_contents)
|
| 261 |
+
|
| 262 |
+
# Don't forget to return an updated state, as nodes in LangGraph should always do
|
| 263 |
+
# For this simple example, we'll just return a copy with an updated status
|
| 264 |
+
updated_state = state.model_copy(update={
|
| 265 |
+
"review_status": "code_reviewed", # Update status after retrieval logic
|
| 266 |
+
"llm_markdown_review":review_markdown,
|
| 267 |
+
})
|
| 268 |
+
return updated_state
|
| 269 |
+
|
| 270 |
+
|
| 271 |
+
import re
|
| 272 |
+
from typing import List, Dict, Tuple, Optional, Any
|
| 273 |
+
|
| 274 |
+
class ParsedComment:
|
| 275 |
+
"""
|
| 276 |
+
Represents a single parsed comment from the LLM's review,
|
| 277 |
+
intended for grouping by file/function.
|
| 278 |
+
"""
|
| 279 |
+
def __init__(self, message: str, suggestion: Optional[str] = None):
|
| 280 |
+
self.message = message
|
| 281 |
+
self.suggestion = suggestion
|
| 282 |
+
|
| 283 |
+
def __repr__(self):
|
| 284 |
+
return f"ParsedComment(msg='{self.message[:50]}...', has_suggestion={self.suggestion is not None})"
|
| 285 |
+
|
| 286 |
+
class ParsedReviewSection:
|
| 287 |
+
"""
|
| 288 |
+
Represents a categorized section of the review, e.g., 'Potential Issues'.
|
| 289 |
+
"""
|
| 290 |
+
def __init__(self, title: str, content: str):
|
| 291 |
+
self.title = title
|
| 292 |
+
self.content = content
|
| 293 |
+
|
| 294 |
+
def __repr__(self):
|
| 295 |
+
return f"ParsedReviewSection(title='{self.title}', content='{self.content[:50]}...')"
|
| 296 |
+
|
| 297 |
+
|
| 298 |
+
# Helper to extract suggestion block and clean message
|
| 299 |
+
def _extract_suggestion(text: str) -> Tuple[Optional[str], str]:
|
| 300 |
+
"""Helper to extract suggestion block and clean message."""
|
| 301 |
+
suggestion_match = re.search(r"```suggestion\n([\s\S]*?)\n```", text, re.MULTILINE)
|
| 302 |
+
suggestion_code = suggestion_match.group(1).strip() if suggestion_match else None
|
| 303 |
+
|
| 304 |
+
# Remove suggestion from the main message
|
| 305 |
+
cleaned_message = re.sub(r"```suggestion[\s\S]*?```", "", text).strip()
|
| 306 |
+
return suggestion_code, cleaned_message
|
| 307 |
+
|
| 308 |
+
def _parse_bullet_comments(text_block: str) -> List[ParsedComment]:
|
| 309 |
+
"""Helper to parse bullet-point comments from a given text block."""
|
| 310 |
+
comments = []
|
| 311 |
+
# FIX: Updated regex for bullet comments
|
| 312 |
+
# Now matches from a bullet point until the start of the next bullet point or end of the text block.
|
| 313 |
+
# This handles multi-line comments and embedded suggestion blocks more robustly.
|
| 314 |
+
comment_matches = re.finditer(r"(^ *[-*]\s*[\s\S]*?)(?=\n *[-*]\s*|\Z)", text_block, re.MULTILINE | re.DOTALL)
|
| 315 |
+
for cm in comment_matches:
|
| 316 |
+
full_comment_text = cm.group(1).strip()
|
| 317 |
+
if full_comment_text:
|
| 318 |
+
suggestion_code, cleaned_message = _extract_suggestion(full_comment_text)
|
| 319 |
+
comments.append(ParsedComment(message=cleaned_message, suggestion=suggestion_code))
|
| 320 |
+
return comments
|
| 321 |
+
|
| 322 |
+
|
| 323 |
+
def parse_llm_review_markdown(markdown_review: str) -> Dict[str, Any]:
|
| 324 |
+
"""
|
| 325 |
+
Parses the LLM-generated Markdown review into a structured dictionary.
|
| 326 |
+
It extracts the overall summary, file-specific/function-specific comments,
|
| 327 |
+
and other general review sections.
|
| 328 |
+
|
| 329 |
+
Args:
|
| 330 |
+
markdown_review (str): The full Markdown string generated by the LLM.
|
| 331 |
+
|
| 332 |
+
Returns:
|
| 333 |
+
Dict[str, Any]: A dictionary containing structured review data:
|
| 334 |
+
- 'overall_impression': str
|
| 335 |
+
- 'file_comments': Dict[str, Dict[str, List[ParsedComment]]]
|
| 336 |
+
(file_path -> function_name -> List[ParsedComment])
|
| 337 |
+
- 'general_sections': List[ParsedReviewSection]
|
| 338 |
+
- 'summary': str
|
| 339 |
+
- 'approval_status': str (extracted from summary, if present)
|
| 340 |
+
"""
|
| 341 |
+
structured_review: Dict[str, Any] = {
|
| 342 |
+
'overall_impression': '',
|
| 343 |
+
'file_comments': {},
|
| 344 |
+
'general_sections': [],
|
| 345 |
+
'summary': '',
|
| 346 |
+
'approval_status': 'Comment' # Default status
|
| 347 |
+
}
|
| 348 |
+
|
| 349 |
+
# Helper to extract content between two headers.
|
| 350 |
+
# Now more flexible: allows optional numbering and variable header level for top sections
|
| 351 |
+
def extract_section_content(text: str, start_header_text: str, end_header_text: str) -> Optional[str]:
|
| 352 |
+
# Pattern to match headers with optional numbering and flexible spacing
|
| 353 |
+
start_pattern = r"^(?:##|###)\s*\d*\.?\s*" + re.escape(start_header_text) + r":\s*$"
|
| 354 |
+
end_pattern = r"^(?:##|###)\s*\d*\.?\s*" + re.escape(end_header_text) + r":\s*$"
|
| 355 |
+
|
| 356 |
+
# Use re.DOTALL to allow . to match newlines
|
| 357 |
+
match = re.search(f"{start_pattern}([\\s\\S]*?)(?={end_pattern}|\\Z)", text, re.MULTILINE | re.DOTALL)
|
| 358 |
+
if match:
|
| 359 |
+
return match.group(1).strip()
|
| 360 |
+
return None
|
| 361 |
+
|
| 362 |
+
# --- 1. Extract Overall Impression ---
|
| 363 |
+
overall_impression_content = extract_section_content(markdown_review, "Overall Impression", "Specific Observations and Suggestions")
|
| 364 |
+
if overall_impression_content:
|
| 365 |
+
structured_review['overall_impression'] = overall_impression_content
|
| 366 |
+
|
| 367 |
+
# --- 2. Extract Specific Observations and Suggestions (File/Function Comments) ---
|
| 368 |
+
specific_obs_section_content = extract_section_content(markdown_review, "Specific Observations and Suggestions", "Potential Issues and Edge Cases")
|
| 369 |
+
|
| 370 |
+
# Debug prints for specific_obs_section_content (kept for verification)
|
| 371 |
+
print(f"\n--- DEBUG: specific_obs_section_content (extracted from markdown_review) ---")
|
| 372 |
+
if specific_obs_section_content is None:
|
| 373 |
+
print("specific_obs_section_content is None")
|
| 374 |
+
elif not specific_obs_section_content.strip():
|
| 375 |
+
print("specific_obs_section_content is empty or only whitespace")
|
| 376 |
+
else:
|
| 377 |
+
print(specific_obs_section_content[:500] + "..." if len(specific_obs_section_content) > 500 else specific_obs_section_content)
|
| 378 |
+
print(f"--- END DEBUG: specific_obs_section_content ---\n")
|
| 379 |
+
|
| 380 |
+
print(f"\n--- DEBUG: Raw specific_obs_section_content (using repr()):")
|
| 381 |
+
if specific_obs_section_content is not None:
|
| 382 |
+
print(repr(specific_obs_section_content))
|
| 383 |
+
print(f"Length of specific_obs_section_content: {len(specific_obs_section_content)}")
|
| 384 |
+
print(f"Does it start with '### `data_processor.py`'? {specific_obs_section_content.startswith('### `data_processor.py`')}")
|
| 385 |
+
starts_as_file_header = False
|
| 386 |
+
if specific_obs_section_content.startswith('### `') or specific_obs_section_content.startswith('**File:'):
|
| 387 |
+
starts_as_file_header = True
|
| 388 |
+
print(f"Does it start with a common file header pattern? {starts_as_file_header}")
|
| 389 |
+
else:
|
| 390 |
+
print("specific_obs_section_content is None.")
|
| 391 |
+
print(f"--- END DEBUG: Raw specific_obs_section_content ---\n")
|
| 392 |
+
|
| 393 |
+
|
| 394 |
+
if specific_obs_section_content:
|
| 395 |
+
# NEW STRATEGY FOR FILE BLOCK PARSING:
|
| 396 |
+
# Step 1: Find all file header line matches first
|
| 397 |
+
file_header_line_pattern = re.compile(
|
| 398 |
+
r"^(?:\*\*File:\s*`?([\w\/\.\-_]+\.\w+)`?\*\*|###\s*`?([\w\/\.\-_]+\.\w+)`?)\s*$",
|
| 399 |
+
re.MULTILINE
|
| 400 |
+
)
|
| 401 |
+
|
| 402 |
+
header_matches = list(file_header_line_pattern.finditer(specific_obs_section_content))
|
| 403 |
+
|
| 404 |
+
print(f"--- DEBUG: Number of file_header_line_pattern matches found (New Strategy): {len(header_matches)} ---")
|
| 405 |
+
if not header_matches:
|
| 406 |
+
print("No file headers were found. Cannot parse file blocks.")
|
| 407 |
+
pass
|
| 408 |
+
else:
|
| 409 |
+
# Step 2: Iterate through header matches and extract content blocks
|
| 410 |
+
for i, header_match in enumerate(header_matches):
|
| 411 |
+
file_name = (header_match.group(1) or header_match.group(2)).strip().replace('`', '')
|
| 412 |
+
|
| 413 |
+
# Determine the start of the content block (after the header line)
|
| 414 |
+
content_start_index = header_match.end()
|
| 415 |
+
|
| 416 |
+
# Determine the end of the content block (start of next header or end of section content)
|
| 417 |
+
content_end_index = len(specific_obs_section_content)
|
| 418 |
+
if i + 1 < len(header_matches):
|
| 419 |
+
content_end_index = header_matches[i+1].start()
|
| 420 |
+
|
| 421 |
+
file_content_block = specific_obs_section_content[content_start_index:content_end_index].strip()
|
| 422 |
+
|
| 423 |
+
print(f"\n--- DEBUG: Processing file (new strategy): {file_name} ---")
|
| 424 |
+
print(f"File content block (first 200 chars):\n{file_content_block[:200]}..." if len(file_content_block) > 200 else file_content_block)
|
| 425 |
+
|
| 426 |
+
if not file_name: continue
|
| 427 |
+
|
| 428 |
+
structured_review['file_comments'][file_name] = {}
|
| 429 |
+
general_comments_for_file: List[ParsedComment] = []
|
| 430 |
+
|
| 431 |
+
# Refined split to capture general file comments and specific function/section comments
|
| 432 |
+
# Matches '#### Function: `func_name`' OR '#### Any other section title'
|
| 433 |
+
sub_section_header_pattern = re.compile(
|
| 434 |
+
r"^(####\s*(?:Function:\s*`?([\w_]+)`?|[\s\S]+?))\s*$",
|
| 435 |
+
re.MULTILINE
|
| 436 |
+
)
|
| 437 |
+
|
| 438 |
+
sub_section_matches_list = list(sub_section_header_pattern.finditer(file_content_block))
|
| 439 |
+
print(f"--- DEBUG: Number of sub-section (####) matches for {file_name}: {len(sub_section_matches_list)} ---")
|
| 440 |
+
if not sub_section_matches_list:
|
| 441 |
+
print(f"No '####' sub-sections were found in the block for {file_name}. All content will be general comments or missed.")
|
| 442 |
+
if file_content_block.strip():
|
| 443 |
+
parsed_general_comments = _parse_bullet_comments(file_content_block.strip())
|
| 444 |
+
structured_review['file_comments'][file_name]["General_File_Comments"] = parsed_general_comments
|
| 445 |
+
print(f" - DEBUG: Parsed {len(parsed_general_comments)} general comments for {file_name}.")
|
| 446 |
+
continue
|
| 447 |
+
|
| 448 |
+
# If sub-sections (#### headers) ARE found, process comments before the first sub-section header (these are file-level comments)
|
| 449 |
+
first_match_start_index = sub_section_matches_list[0].start()
|
| 450 |
+
pre_section_comments_content = file_content_block[:first_match_start_index].strip()
|
| 451 |
+
if pre_section_comments_content:
|
| 452 |
+
general_comments_for_file.extend(_parse_bullet_comments(pre_section_comments_content))
|
| 453 |
+
print(f" - DEBUG: Added {len(general_comments_for_file)} general comments (before first sub-section) for {file_name}.")
|
| 454 |
+
|
| 455 |
+
|
| 456 |
+
# Process each sub-section
|
| 457 |
+
for k, current_match in enumerate(sub_section_matches_list):
|
| 458 |
+
section_header_raw = current_match.group(1).strip()
|
| 459 |
+
func_name_from_group = current_match.group(2)
|
| 460 |
+
|
| 461 |
+
section_title_key = ""
|
| 462 |
+
if func_name_from_group:
|
| 463 |
+
section_title_key = func_name_from_group.replace('`', '')
|
| 464 |
+
else:
|
| 465 |
+
section_title_key = section_header_raw[section_header_raw.find('####') + 4:].strip().replace('`', '')
|
| 466 |
+
|
| 467 |
+
content_start_index = current_match.end()
|
| 468 |
+
content_end_index = (sub_section_matches_list[k+1].start()
|
| 469 |
+
if k + 1 < len(sub_section_matches_list)
|
| 470 |
+
else len(file_content_block))
|
| 471 |
+
|
| 472 |
+
sub_section_content = file_content_block[content_start_index:content_end_index].strip()
|
| 473 |
+
|
| 474 |
+
print(f" - DEBUG: Sub-section '{section_title_key}' content (first 100 chars): {sub_section_content[:100]}..." if len(sub_section_content) > 100 else sub_section_content)
|
| 475 |
+
if sub_section_content:
|
| 476 |
+
parsed_comments_for_section = _parse_bullet_comments(sub_section_content)
|
| 477 |
+
structured_review['file_comments'][file_name][section_title_key] = parsed_comments_for_section
|
| 478 |
+
print(f" - DEBUG: Parsed {len(parsed_comments_for_section)} comments for '{section_title_key}'.")
|
| 479 |
+
else:
|
| 480 |
+
structured_review['file_comments'][file_name][section_title_key] = []
|
| 481 |
+
print(f" - DEBUG: No content for sub-section '{section_title_key}'.")
|
| 482 |
+
|
| 483 |
+
if general_comments_for_file:
|
| 484 |
+
structured_review['file_comments'][file_name]["General_File_Comments"] = general_comments_for_file
|
| 485 |
+
|
| 486 |
+
|
| 487 |
+
# --- 3. Extract General Sections ---
|
| 488 |
+
general_section_headers = [
|
| 489 |
+
("Potential Issues and Edge Cases", "Potential Issues and Edge Cases"),
|
| 490 |
+
("Security Implications", "Security Implications"),
|
| 491 |
+
("Adherence to Best Practices (PEP 8)", "Adherence to Best Practices (PEP 8)"),
|
| 492 |
+
("Performance Considerations", "Performance Considerations"),
|
| 493 |
+
("Unit Testing Suggestions", "Unit Testing Suggestions"),
|
| 494 |
+
("Docstring/Comment Improvements", "Docstring/Comment Improvements"),
|
| 495 |
+
("Clarity and Conciseness", "Clarity and Conciseness"),
|
| 496 |
+
("Summary", "Summary"),
|
| 497 |
+
]
|
| 498 |
+
|
| 499 |
+
current_markdown_to_parse = markdown_review
|
| 500 |
+
|
| 501 |
+
start_parsing_from_match = re.search(r"^##\s*\d*\.?\s*Potential Issues and Edge Cases:\s*$", current_markdown_to_parse, re.MULTILINE)
|
| 502 |
+
if not start_parsing_from_match:
|
| 503 |
+
specific_obs_end_idx = 0
|
| 504 |
+
specific_obs_match = re.search(r"^##\s*\d*\.?\s*Specific Observations and Suggestions:\s*([\s\S]*?)(?=^##\s*\d*\.?\s*[\w\s\(\)\/]+:|\Z)", current_markdown_to_parse, re.MULTILINE | re.DOTALL)
|
| 505 |
+
if specific_obs_match:
|
| 506 |
+
current_markdown_to_parse = current_markdown_to_parse[specific_obs_match.end():].strip()
|
| 507 |
+
else:
|
| 508 |
+
pass
|
| 509 |
+
else:
|
| 510 |
+
current_markdown_to_parse = current_markdown_to_parse[start_parsing_from_match.start():].strip()
|
| 511 |
+
|
| 512 |
+
|
| 513 |
+
for i, (title, header_text) in enumerate(general_section_headers):
|
| 514 |
+
current_header_pattern = r"^##\s*\d*\.?\s*" + re.escape(header_text) + r":\s*$"
|
| 515 |
+
|
| 516 |
+
start_match = re.search(current_header_pattern, current_markdown_to_parse, re.MULTILINE)
|
| 517 |
+
if not start_match:
|
| 518 |
+
continue
|
| 519 |
+
|
| 520 |
+
section_start_idx = start_match.end()
|
| 521 |
+
|
| 522 |
+
section_end_idx = len(current_markdown_to_parse)
|
| 523 |
+
|
| 524 |
+
if i + 1 < len(general_section_headers):
|
| 525 |
+
next_header_text = general_section_headers[i+1][1]
|
| 526 |
+
next_header_pattern = r"^##\s*\d*\.?\s*" + re.escape(next_header_text) + r":\s*$"
|
| 527 |
+
next_match = re.search(next_header_pattern, current_markdown_to_parse[section_start_idx:], re.MULTILINE)
|
| 528 |
+
if next_match:
|
| 529 |
+
section_end_idx = section_start_idx + next_match.start()
|
| 530 |
+
|
| 531 |
+
content_raw = current_markdown_to_parse[section_start_idx:section_end_idx].strip()
|
| 532 |
+
|
| 533 |
+
if title == "Summary":
|
| 534 |
+
structured_review['summary'] = content_raw
|
| 535 |
+
structured_review['summary'] = re.sub(r'(`{3,})\s*$', '', structured_review['summary']).strip()
|
| 536 |
+
|
| 537 |
+
approval_match = re.search(r"^\s*\*\*(?:Action|Recommended Action|Status):\*\*\s*(Approve|Request Changes|Comment|No action required)", structured_review['summary'], re.IGNORECASE | re.MULTILINE)
|
| 538 |
+
if approval_match:
|
| 539 |
+
structured_review['approval_status'] = approval_match.group(1).strip().replace(' ', '').capitalize()
|
| 540 |
+
else:
|
| 541 |
+
structured_review['approval_status'] = 'Comment'
|
| 542 |
+
else:
|
| 543 |
+
structured_review['general_sections'].append(ParsedReviewSection(title=title, content=content_raw))
|
| 544 |
+
|
| 545 |
+
current_markdown_to_parse = current_markdown_to_parse[section_end_idx:].strip()
|
| 546 |
+
|
| 547 |
+
if not structured_review['summary']:
|
| 548 |
+
summary_match = re.search(r"^##\s*\d*\.?\s*Summary:\s*([\s\S]*)$", markdown_review, re.MULTILINE | re.DOTALL)
|
| 549 |
+
if summary_match:
|
| 550 |
+
structured_review['summary'] = summary_match.group(1).strip()
|
| 551 |
+
structured_review['summary'] = re.sub(r'(`{3,})\s*$', '', structured_review['summary']).strip()
|
| 552 |
+
|
| 553 |
+
approval_match = re.search(r"^\s*\*\*(?:Action|Recommended Action|Status):\*\*\s*(Approve|Request Changes|Comment|No action required)", structured_review['summary'], re.IGNORECASE | re.MULTILINE)
|
| 554 |
+
if approval_match:
|
| 555 |
+
structured_review['approval_status'] = approval_match.group(1).strip().replace(' ', '').capitalize()
|
| 556 |
+
else:
|
| 557 |
+
structured_review['approval_status'] = 'Comment'
|
| 558 |
+
else:
|
| 559 |
+
structured_review['summary'] = "Automated review completed."
|
| 560 |
+
|
| 561 |
+
return structured_review
|
| 562 |
+
|
| 563 |
+
def feedback_formatter_node(state: PRReviewState):
|
| 564 |
+
llm_markdown_review = state.llm_markdown_review
|
| 565 |
+
|
| 566 |
+
parsed_llm_review_data = parse_llm_review_markdown(llm_markdown_review)
|
| 567 |
+
|
| 568 |
+
# Don't forget to return an updated state, as nodes in LangGraph should always do
|
| 569 |
+
# For this simple example, we'll just return a copy with an updated status
|
| 570 |
+
updated_state = state.model_copy(update={
|
| 571 |
+
"review_status": "review_parsed" ,# Update status after retrieval logic
|
| 572 |
+
"parsed_llm_review_data":parsed_llm_review_data,
|
| 573 |
+
})
|
| 574 |
+
return updated_state
|
| 575 |
+
|
| 576 |
+
|
| 577 |
+
from github import Github, PullRequest
|
| 578 |
+
from github.GithubException import GithubException, UnknownObjectException
|
| 579 |
+
from github.Commit import Commit # Import Commit type for clarity and correctness
|
| 580 |
+
from typing import Dict, Any, List, Optional
|
| 581 |
+
import os
|
| 582 |
+
import re
|
| 583 |
+
import logging
|
| 584 |
+
|
| 585 |
+
# IMPORTANT: These classes should be imported from src.utils.markdown_parser
|
| 586 |
+
# For standalone execution or if import paths are complex, ensure they are correctly defined or imported.
|
| 587 |
+
class ParsedComment:
|
| 588 |
+
def __init__(self, message: str, suggestion: Optional[str] = None):
|
| 589 |
+
self.message = message
|
| 590 |
+
self.suggestion = suggestion
|
| 591 |
+
def __repr__(self):
|
| 592 |
+
return f"ParsedComment(msg='{self.message[:50]}...', has_suggestion={self.suggestion is not None})"
|
| 593 |
+
|
| 594 |
+
class ParsedReviewSection:
|
| 595 |
+
def __init__(self, title: str, content: str):
|
| 596 |
+
self.title = title
|
| 597 |
+
self.content = content
|
| 598 |
+
def __repr__(self):
|
| 599 |
+
return f"ParsedReviewSection(title='{self.title}', content='{self.content[:50]}...')"
|
| 600 |
+
|
| 601 |
+
|
| 602 |
+
# Configure logging (optional, but good practice)
|
| 603 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
| 604 |
+
|
| 605 |
+
|
| 606 |
+
def post_review_comments_on_github(
|
| 607 |
+
repo_name: str,
|
| 608 |
+
pr_id: int,
|
| 609 |
+
parsed_review_data: Dict[str, Any],
|
| 610 |
+
github_token: Optional[str] = None
|
| 611 |
+
) -> Dict[str, Any]:
|
| 612 |
+
"""
|
| 613 |
+
Posts a structured code review to a GitHub Pull Request.
|
| 614 |
+
|
| 615 |
+
Args:
|
| 616 |
+
repo_name (str): The full name of the repository (e.g., "owner/repo").
|
| 617 |
+
pr_id (int): The Pull Request number.
|
| 618 |
+
parsed_review_data (Dict[str, Any]): The structured review data
|
| 619 |
+
as returned by parse_llm_review_markdown.
|
| 620 |
+
github_token (str, optional): GitHub Personal Access Token.
|
| 621 |
+
If None, tries to read from GITHUB_TOKEN env var.
|
| 622 |
+
|
| 623 |
+
Returns:
|
| 624 |
+
Dict[str, Any]: A dictionary containing details of the posted review,
|
| 625 |
+
e.g., {'status': 'success', 'review_url': '...', 'main_comment_id': ...}.
|
| 626 |
+
Raises an exception on failure.
|
| 627 |
+
"""
|
| 628 |
+
if github_token is None:
|
| 629 |
+
github_token = os.getenv("GITHUB_TOKEN")
|
| 630 |
+
if github_token is None:
|
| 631 |
+
logging.error("GitHub token not provided and GITHUB_TOKEN environment variable not set.")
|
| 632 |
+
raise ValueError("GitHub token not provided and GITHUB_TOKEN environment variable not set.")
|
| 633 |
+
|
| 634 |
+
try:
|
| 635 |
+
g = Github(github_token)
|
| 636 |
+
repo = g.get_repo(repo_name)
|
| 637 |
+
pr = repo.get_pull(pr_id)
|
| 638 |
+
logging.info(f"Connected to GitHub repo '{repo_name}', PR #{pr_id}.")
|
| 639 |
+
|
| 640 |
+
# --- 1. Prepare the Main Review Body ---
|
| 641 |
+
overall_impression = parsed_review_data.get('overall_impression', '')
|
| 642 |
+
general_sections = parsed_review_data.get('general_sections', [])
|
| 643 |
+
summary = parsed_review_data.get('summary', '')
|
| 644 |
+
approval_status = parsed_review_data.get('approval_status', 'COMMENT').upper()
|
| 645 |
+
|
| 646 |
+
main_review_body = f"### 🤖 Automated Code Review\n\n"
|
| 647 |
+
|
| 648 |
+
if overall_impression.strip():
|
| 649 |
+
main_review_body += f"**Overall Impression:**\n{overall_impression}\n\n---\n\n"
|
| 650 |
+
|
| 651 |
+
for section in general_sections:
|
| 652 |
+
title_to_add = section.title
|
| 653 |
+
content_to_add = section.content
|
| 654 |
+
|
| 655 |
+
if content_to_add.strip():
|
| 656 |
+
main_review_body += f"### {title_to_add}\n{content_to_add}\n\n---\n\n"
|
| 657 |
+
|
| 658 |
+
if summary.strip():
|
| 659 |
+
main_review_body += f"### Summary\n{summary}\n\n"
|
| 660 |
+
|
| 661 |
+
main_review_body += f"**Recommended Action:** {approval_status}\n"
|
| 662 |
+
|
| 663 |
+
github_event = "COMMENT"
|
| 664 |
+
if approval_status == "APPROVE":
|
| 665 |
+
github_event = "APPROVE"
|
| 666 |
+
elif approval_status == "REQUEST CHANGES":
|
| 667 |
+
github_event = "REQUEST_CHANGES"
|
| 668 |
+
|
| 669 |
+
logging.info(f"Calculated GitHub review event: {github_event}")
|
| 670 |
+
|
| 671 |
+
# --- 2. Prepare Line/File Comments ---
|
| 672 |
+
github_comments = []
|
| 673 |
+
file_comments_data = parsed_review_data.get('file_comments', {})
|
| 674 |
+
|
| 675 |
+
head_commit_sha = pr.head.sha
|
| 676 |
+
# FIX: Get the Commit object from the SHA
|
| 677 |
+
pr_commit_obj = repo.get_commit(head_commit_sha) # <--- ADDED THIS LINE
|
| 678 |
+
logging.info(f"Using head commit SHA: {head_commit_sha} (as Commit object)")
|
| 679 |
+
|
| 680 |
+
if file_comments_data:
|
| 681 |
+
logging.info(f"Preparing {len(file_comments_data)} file-specific comments.")
|
| 682 |
+
for file_path, functions_data in file_comments_data.items():
|
| 683 |
+
consolidated_file_comment_body = f"### Review for `{file_path}`\n\n"
|
| 684 |
+
|
| 685 |
+
sorted_func_names = sorted(functions_data.keys(), key=lambda x: (0 if x == "General_File_Comments" else 1, x))
|
| 686 |
+
|
| 687 |
+
for func_name in sorted_func_names:
|
| 688 |
+
comments_for_func = functions_data[func_name]
|
| 689 |
+
|
| 690 |
+
if not comments_for_func:
|
| 691 |
+
continue
|
| 692 |
+
|
| 693 |
+
if func_name != "General_File_Comments":
|
| 694 |
+
consolidated_file_comment_body += f"#### ⚙️ Function: `{func_name}`\n\n"
|
| 695 |
+
else:
|
| 696 |
+
if len(sorted_func_names) > 1 or (len(sorted_func_names) == 1 and func_name == "General_File_Comments"):
|
| 697 |
+
consolidated_file_comment_body += f"#### 📄 General File Comments\n\n"
|
| 698 |
+
|
| 699 |
+
for comment in comments_for_func:
|
| 700 |
+
consolidated_file_comment_body += f"{comment.message}\n"
|
| 701 |
+
if comment.suggestion:
|
| 702 |
+
consolidated_file_comment_body += f"\n```suggestion\n{comment.suggestion}\n```\n\n"
|
| 703 |
+
consolidated_file_comment_body += "\n---\n\n"
|
| 704 |
+
|
| 705 |
+
if consolidated_file_comment_body.strip() != f"### Review for `{file_path}`":
|
| 706 |
+
github_comments.append({
|
| 707 |
+
"path": file_path,
|
| 708 |
+
"position": 1,
|
| 709 |
+
"body": consolidated_file_comment_body.strip(),
|
| 710 |
+
})
|
| 711 |
+
|
| 712 |
+
# --- 3. Submit the Review ---
|
| 713 |
+
# Pass the Commit object to the 'commit' parameter
|
| 714 |
+
review = pr.create_review(
|
| 715 |
+
commit=pr_commit_obj, # <--- CHANGED THIS LINE
|
| 716 |
+
body=main_review_body,
|
| 717 |
+
event=github_event,
|
| 718 |
+
comments=github_comments
|
| 719 |
+
)
|
| 720 |
+
|
| 721 |
+
logging.info(f"Successfully posted GitHub review. URL: {review.html_url}")
|
| 722 |
+
return {
|
| 723 |
+
'status': 'success',
|
| 724 |
+
'review_url': review.html_url,
|
| 725 |
+
'review_id': review.id,
|
| 726 |
+
'main_comment_body': main_review_body
|
| 727 |
+
}
|
| 728 |
+
|
| 729 |
+
except UnknownObjectException as e:
|
| 730 |
+
logging.error(f"GitHub object not found (repo or PR): {e}")
|
| 731 |
+
raise ValueError(f"GitHub object not found (repo or PR): {e}")
|
| 732 |
+
except GithubException as e:
|
| 733 |
+
logging.error(f"GitHub API error: {e}")
|
| 734 |
+
raise RuntimeError(f"GitHub API error: {e}")
|
| 735 |
+
except Exception as e:
|
| 736 |
+
logging.critical(f"An unexpected error occurred while posting review: {e}", exc_info=True)
|
| 737 |
+
raise RuntimeError(f"An unexpected error occurred while posting review: {e}")
|
| 738 |
+
|
| 739 |
+
|
| 740 |
+
def post_review_coments_on_github_node(state:PRReviewState):
|
| 741 |
+
repo_name = state.repo_name
|
| 742 |
+
pr_id = state.pr_id
|
| 743 |
+
parsed_llm_review_data = state.parsed_llm_review_data
|
| 744 |
+
|
| 745 |
+
result = post_review_comments_on_github(repo_name,pr_id,parsed_llm_review_data, git_hub_token)
|
| 746 |
+
|
| 747 |
+
# Don't forget to return an updated state, as nodes in LangGraph should always do
|
| 748 |
+
# For this simple example, we'll just return a copy with an updated status
|
| 749 |
+
updated_state = state.model_copy(update={
|
| 750 |
+
"review_status": "posted", # Update status after retrieval logic
|
| 751 |
+
"review_comment_url":result['review_url'],
|
| 752 |
+
"review_id":result['review_id'],
|
| 753 |
+
"last_error":result['status'] # change this field later
|
| 754 |
+
|
| 755 |
+
})
|
| 756 |
+
return updated_state
|
| 757 |
+
|
| 758 |
+
|
| 759 |
+
from IPython.display import Image, display
|
| 760 |
+
from langgraph.graph import StateGraph, START, END
|
| 761 |
+
|
| 762 |
+
# Build graph
|
| 763 |
+
builder = StateGraph(PRReviewState)
|
| 764 |
+
builder.add_node("code_retriever_node", code_retriever_node)
|
| 765 |
+
builder.add_node("code_reviewer_node", code_reviewer_node)
|
| 766 |
+
builder.add_node("feedback_formatter_node", feedback_formatter_node)
|
| 767 |
+
builder.add_node("post_review_coments_on_github_node", post_review_coments_on_github_node)
|
| 768 |
+
|
| 769 |
+
# Logic
|
| 770 |
+
builder.add_edge(START, "code_retriever_node")
|
| 771 |
+
builder.add_edge("code_retriever_node", "code_reviewer_node")
|
| 772 |
+
builder.add_edge("code_reviewer_node", "feedback_formatter_node")
|
| 773 |
+
builder.add_edge("feedback_formatter_node", "post_review_coments_on_github_node")
|
| 774 |
+
builder.add_edge("post_review_coments_on_github_node", END)
|
| 775 |
+
|
| 776 |
+
# need to fix ParsedComment serializable error
|
| 777 |
+
#graph = builder.compile(checkpointer=memory)
|
| 778 |
+
graph = builder.compile()
|
| 779 |
+
|
| 780 |
+
|
| 781 |
+
# View
|
| 782 |
+
#display(Image(graph.get_graph().draw_mermaid_png()))
|